55
66import pandas as pd
77
8+ from activity_browser .bwutils .settings import Settings
89from .fields import all_fields , all_types
910
1011
@@ -101,22 +102,22 @@ def flush_mutations(self) -> tuple[set[tuple[str, str]], set[tuple[str, str]], s
101102 self ._updated .clear ()
102103 self ._deleted .clear ()
103104
104- cache_path = filesystem .get_project_ab_path () / "metadatastore_cache.pkl"
105- self ._dataframe .to_pickle (cache_path )
105+ if Settings ()["metadatastore" ]["caching_enabled" ]:
106+ cache_path = filesystem .get_project_ab_path () / "metadatastore_cache.pkl"
107+ self ._dataframe .to_pickle (cache_path )
106108
107109 return added , updated , deleted
108110
109111 def match (self , ** kwargs : dict [str , str ]) -> pd .DataFrame :
110112 """Return a slice of the dataframe matching the criteria.
111113 """
112- with self ._df_lock :
113- df = self ._dataframe .query (
114- " and " .join (
115- [
116- f"`{ key } `.astype('str') == { str (value )!r} " if not pd .isna (value ) else f"`{ key } `.isnull()"
117- for key , value in kwargs .items ()
118- ])
119- )
114+ df = self ._dataframe .query (
115+ " and " .join (
116+ [
117+ f"`{ key } `.astype('str') == { str (value )!r} " if not pd .isna (value ) else f"`{ key } `.isnull()"
118+ for key , value in kwargs .items ()
119+ ])
120+ )
120121
121122 return df
122123
@@ -142,23 +143,85 @@ def get_database_metadata(self, db_name: str, columns: list = None) -> pd.DataFr
142143 df = self ._dataframe .loc [[db_name ], columns ]
143144 return df .reindex (columns , axis = "columns" )
144145
146+ def _pandas_search (self , query : str , database : str = None , columns : list = None ) -> pd .DataFrame :
147+ """Fallback pandas-based search when searcher is not initialized.
148+
149+ Args:
150+ query: Search query string, may contain key:value parameters
151+ database: Optional database name to restrict search
152+ columns: Optional list of columns to return
153+
154+ Returns:
155+ DataFrame with matching results
156+ """
157+ params , clean_query = get_query_parameters (query )
158+ columns = columns if columns is not None else all_fields
159+
160+ # Start with the full dataframe or database subset
161+ if database and database in self .databases :
162+ df = self ._dataframe .loc [[database ]]
163+ else :
164+ df = self ._dataframe
165+
166+ if not clean_query .strip ():
167+ # If no search query, just filter by parameters
168+ if params :
169+ extra_query = " & " .join (
170+ [
171+ f"`{ key } `.astype('str').str.contains('{ value } ', case=False)"
172+ for key , value in params .items ()
173+ if key in df .columns
174+ ]
175+ )
176+ if extra_query :
177+ df = df .query (extra_query )
178+ return df [columns ]
179+
180+ # Search across text fields: name, product, synonyms, categories, unit, location
181+ search_fields = ['name' , 'product' , 'synonyms' , 'categories' , 'unit' , 'location' , 'CAS number' ]
182+ mask = pd .Series ([False ] * len (df ), index = df .index )
183+
184+ for field in search_fields :
185+ if field in df .columns :
186+ # Case-insensitive search
187+ mask |= df [field ].astype (str ).str .contains (clean_query , case = False , na = False )
188+
189+ df = df [mask ]
190+
191+ # Apply additional parameter filters if any
192+ if params :
193+ extra_query = " & " .join (
194+ [
195+ f"`{ key } `.astype('str').str.contains('{ value } ', case=False)"
196+ for key , value in params .items ()
197+ if key in df .columns
198+ ]
199+ )
200+ if extra_query :
201+ df = df .query (extra_query )
202+
203+ return df [columns ] if columns else df
204+
145205 def search (self , query : str , columns : list = None ) -> pd .DataFrame :
146- if not self .searcher :
147- logger .warning (f"Attempted to search metadata before searcher was initialized." )
148- return pd .DataFrame (columns = columns or all_fields )
206+ if self .searcher :
207+ # Advanced searcher is initialized, so use that
208+ params , query = get_query_parameters (query )
209+ result = self .searcher .search (query )
210+ return self ._meta_from_result (params , result , columns )
149211
150- params , query = get_query_parameters ( query )
151- result = self . searcher . search ( query )
152- return self ._meta_from_result ( params , result , columns )
212+ # Fallback to simple pandas search
213+ logger . debug ( "Using simple pandas search as searcher is not initialized." )
214+ return self ._pandas_search ( query , columns = columns )
153215
154216 def search_database (self , query : str , database : str , columns : list = None ) -> pd .DataFrame :
155- if not self .searcher :
156- logger .warning (f"Attempted to search metadata before searcher was initialized." )
157- return pd .DataFrame (columns = columns or all_fields )
158-
159- params , query = get_query_parameters (query )
160- result = self .searcher .fuzzy_search (query , database = database )
161- return self ._meta_from_result (params , result , columns )
217+ if self .searcher :
218+ params , query = get_query_parameters (query )
219+ result = self .searcher .fuzzy_search (query , database = database )
220+ return self ._meta_from_result (params , result , columns )
221+
222+ # Fallback to simple pandas search
223+ logger .debug (f"Using simple pandas search for database '{ database } ' as searcher is not initialized." )
224+ return self ._pandas_search (query , database = database , columns = columns )
162225
163226 def _meta_from_result (self , params : dict , result : list [int ], columns : list = None ) -> pd .DataFrame :
164227 df = self ._dataframe .loc [self .dataframe ["id" ].isin (result ), columns or all_fields ]
0 commit comments