Module meteor.rsmap
Map class definition and related functions
Functions
def assert_is_map(obj: Any, *, require_uncertainties: bool) ‑> None
-
Expand source code
def assert_is_map(obj: Any, *, require_uncertainties: bool) -> None: if not isinstance(obj, Map): msg = f"expected {obj} to be a rsmap.Map, got {type(obj)}" raise TypeError(msg) if require_uncertainties and (not obj.has_uncertainties): msg = f"{obj} Map missing required uncertainty column" raise MissingUncertaintiesError(msg)
Classes
class Map (data: dict | pd.DataFrame | rs.DataSet,
*,
amplitude_column: str = 'F',
phase_column: str = 'PHI',
uncertainty_column: str | None = 'SIGF',
**kwargs: Any)-
Expand source code
class Map(rs.DataSet): """ A high-level interface for a crystallographic map of any kind. Specifically, this class is based on a reciprocalspaceship `DataSet` (ie. a crystographically-aware pandas `DataFrame`), but is restricted to three and only three special columns corresponding to: - (real) `amplitudes` - `phases` - `uncertainties`, ie the standard deviation for a Gaussian around the amplitude mean In addition, the class maintains an `index` of Miller indices, as well as the crystallographic metadata supported by `rs.DataSet`, most notably a `cell` and `spacegroup`. These structured data enable this class to perform some routine map-based caluclations, such as - computing a real-space map, or computing map coefficients from a map - converting between complex Cartesian and polar (amplitude/phase) structure factors - reading and writing mtz and ccp4 map files all in a way that automatically facilitates the bookkeeping tasks normally associated with these relatively simple operations. """ # these columns are always allowed _allowed_columns: ClassVar[list[str]] = ["H", "K", "L"] # in addition, __init__ specifies 3 columns special that can be named dynamically to support: # amplitudes, phases, uncertainties; all other columns are forbidden @cellify @spacegroupify def __init__( self, data: dict | pd.DataFrame | rs.DataSet, *, amplitude_column: str = "F", phase_column: str = "PHI", uncertainty_column: str | None = "SIGF", **kwargs: Any, ) -> None: super().__init__(data=data, **kwargs) self._amplitude_column = amplitude_column self._phase_column = phase_column self._uncertainty_column = uncertainty_column for column in [self._amplitude_column, self._phase_column]: if column not in self.columns: msg = "amplitude and phase columns must be in input `data`... " msg += f"looking for `{column}`, found `{self.columns}`" raise KeyError(msg) columns_to_keep = [*self._allowed_columns, amplitude_column, phase_column] if uncertainty_column and (uncertainty_column in self.columns): columns_to_keep.append(uncertainty_column) # this feels dangerous, but I cannot find a better way | author: @tjlane excess_columns = set(self.columns) - set(columns_to_keep) for column in excess_columns: del self[column] # ensure types correct self.amplitudes = self._verify_amplitude_type(self.amplitudes, fix=True) self.phases = self._verify_phase_type(self.phases, fix=True) if self.has_uncertainties: self.uncertainties = self._verify_uncertainty_type(self.uncertainties, fix=True) @property def _constructor(self) -> Callable[[Any], Map]: def constructor_fxn(*args: Any, **kwargs: Any) -> Map: return Map( *args, amplitude_column=self._amplitude_column, phase_column=self._phase_column, uncertainty_column=self._uncertainty_column, **kwargs, ) return constructor_fxn @property def _constructor_sliced(self) -> Callable[[Any], rs.DataSeries]: return rs.DataSeries def _verify_type( self, name: str, allowed_types: list[type], dataseries: rs.DataSeries, *, fix: bool, cast_fix_to: type, ) -> rs.DataSeries: if dataseries.dtype not in allowed_types: if fix: return dataseries.astype(cast_fix_to) msg = f"dtype for passed {name} not allowed, got: {dataseries.dtype} allow {allowed_types}" raise AssertionError(msg) return dataseries def _verify_amplitude_type( self, dataseries: rs.DataSeries, *, fix: bool = True, ) -> rs.DataSeries: name = "amplitude" amplitude_dtypes = [ rs.StructureFactorAmplitudeDtype(), rs.FriedelStructureFactorAmplitudeDtype(), rs.NormalizedStructureFactorAmplitudeDtype(), rs.AnomalousDifferenceDtype(), ] return self._verify_type( name, amplitude_dtypes, dataseries, fix=fix, cast_fix_to=rs.StructureFactorAmplitudeDtype(), ) def _verify_phase_type(self, dataseries: rs.DataSeries, *, fix: bool = True) -> rs.DataSeries: name = "phase" phase_dtypes = [rs.PhaseDtype()] return self._verify_type( name, phase_dtypes, dataseries, fix=fix, cast_fix_to=rs.PhaseDtype() ) def _verify_uncertainty_type( self, dataseries: rs.DataSeries, *, fix: bool = True, ) -> rs.DataSeries: name = "uncertainties" uncertainty_dtypes = [ rs.StandardDeviationDtype(), rs.StandardDeviationFriedelIDtype(), rs.StandardDeviationFriedelSFDtype(), ] return self._verify_type( name, uncertainty_dtypes, dataseries, fix=fix, cast_fix_to=rs.StandardDeviationDtype() ) def __setitem__(self, key: str, value: Any) -> None: allowed = list(self.columns) + self._allowed_columns if key not in allowed: msg = "column assignment not allowed for Map objects" raise MapMutabilityError(msg) super().__setitem__(key, value) def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool = False) -> None: if column in self._allowed_columns: super().insert(loc, column, value, allow_duplicates=allow_duplicates) else: msg = "general column assignment not allowed for Map objects" msg += f"special columns allowed: {self._allowed_columns}; " msg += "see also Map.set_uncertainties(...)" raise MapMutabilityError(msg) @overload def drop(self, labels: Any, *, inplace: Literal[True]) -> None: ... @overload def drop(self, labels: Any, *, inplace: Literal[False]) -> Map: ... def drop(self, labels: Any, *, inplace: bool = False) -> None | Map: return super().drop(labels=labels, axis="index", columns=None, inplace=inplace) def get_hkls(self) -> np.ndarray: # overwrite rs implt'n, return w/o modifying self -> same behavior, under testing - @tjlane # this is a rather horrible thing to do and we should fix it # best is to push changes upstream hkl_names = ["H", "K", "L"] if self.index.names == hkl_names: hkls = self.index.to_frame().to_numpy(dtype=np.int32) elif all(col in self.columns for col in hkl_names): # we need to pull out each column as a separate DataSeries so that we don't try to # create a new Map object without F, PHI hkls = np.vstack([self[col].to_numpy(dtype=np.int32) for col in hkl_names]).T else: msg = f"cannot find `H`, `K`, and `L` columns in index or columns {self.columns}" raise ValueError(msg) if hkls.shape[-1] != NUMBER_OF_DIMENSIONS_IN_UNIVERSE: msg = f"something went wrong, HKL array has a funny shape: {hkls.shape}" raise RuntimeError(msg) return hkls def compute_dHKL(self) -> rs.DataSeries: # noqa: N802, caps from reciprocalspaceship # rs adds a "dHKL" column to the DataFrame # that could be enabled by adding "dHKL" to _allowed_columns - @tjlane if not hasattr(self, "cell"): msg = "no `cell` attribute set, cannot compute resolution (d-values)" raise AttributeError(msg) d_hkl = self.cell.calculate_d_array(self.get_hkls()) return rs.DataSeries(d_hkl, dtype="R", index=self.index) @property def resolution_limits(self) -> tuple[float, float]: d_hkl = self.compute_dHKL() return np.max(d_hkl), np.min(d_hkl) @property def amplitudes(self) -> rs.DataSeries: return self[self._amplitude_column] @amplitudes.setter def amplitudes(self, values: rs.DataSeries) -> None: values = self._verify_amplitude_type(values) self[self._amplitude_column] = values @property def amplitude_column_name(self) -> str: return self._amplitude_column @property def phases(self) -> rs.DataSeries: return self[self._phase_column] @phases.setter def phases(self, values: rs.DataSeries) -> None: values = self._verify_phase_type(values) self[self._phase_column] = values @property def phase_column_name(self) -> str: return self._phase_column @property def has_uncertainties(self) -> bool: return self._uncertainty_column in self.columns @property def uncertainties(self) -> rs.DataSeries: if self.has_uncertainties: return self[self._uncertainty_column] msg = "uncertainties not set for Map object" raise AttributeError(msg) @uncertainties.setter def uncertainties(self, values: rs.DataSeries) -> None: if self.has_uncertainties: values = self._verify_uncertainty_type(values) self[self._uncertainty_column] = values # type: ignore[index] else: msg = "uncertainties unset, and Pandas forbids assignment via attributes; " msg += "to initialize, use Map.set_uncertainties(...)" raise AttributeError(msg) def set_uncertainties(self, values: rs.DataSeries, column_name: str = "SIGF") -> None: values = self._verify_uncertainty_type(values) if self.has_uncertainties: self.uncertainties = values else: # otherwise, create a new column self._uncertainty_column = column_name number_of_columns = len(self.columns) number_of_columns_with_just_amplitudes_and_phases = 2 if number_of_columns != number_of_columns_with_just_amplitudes_and_phases: msg = "Misconfigured columns" raise RuntimeError(msg) super().insert( number_of_columns, self._uncertainty_column, values, allow_duplicates=False ) @property def uncertainties_column_name(self) -> str: if self.has_uncertainties: if not isinstance(self._uncertainty_column, str): msg = "misconfigured uncertainty column" raise RuntimeError(msg) return self._uncertainty_column msg = "uncertainties not set for Map object" raise AttributeError(msg) def canonicalize_amplitudes(self) -> None: canonicalize_amplitudes( self, amplitude_column=self._amplitude_column, phase_column=self._phase_column, inplace=True, ) def to_structurefactor(self) -> rs.DataSeries: """Return a DataSeries of complex structure factor amplitudes""" return super().to_structurefactor(self._amplitude_column, self._phase_column) @overload @classmethod def from_structurefactor( cls, complex_structurefactor: rs.DataSeries, *, index: None = None, cell: CellType | None = None, spacegroup: SpacegroupType | None = None, ) -> Map: ... @overload @classmethod def from_structurefactor( cls, complex_structurefactor: np.ndarray, *, index: pd.Index, cell: CellType | None = None, spacegroup: SpacegroupType | None = None, ) -> Map: ... @classmethod @cellify("cell") @spacegroupify("spacegroup") def from_structurefactor( cls, complex_structurefactor: np.ndarray | rs.DataSeries, *, index: pd.Index | None = None, cell: CellType | None = None, spacegroup: SpacegroupType | None = None, ) -> Map: # 1. `rs.DataSet.from_structurefactor` exists, but it operates on a column that's already # part of the dataset; having such a (redundant) column is forbidden by `Map` # 2. recprocalspaceship has a `from_structurefactor` method, but it is occasionally # mangling indices for me when the input is a numpy array, as of 16 OCT 24 # # hopefully we can resolve these and reuse code! - @tjlane if index is None: if isinstance(complex_structurefactor, rs.DataSeries) and hasattr( complex_structurefactor, "index" ): index = complex_structurefactor.index else: msg = "if `complex_structurefactor` is not a `DataSeries` with an `index` attribute" msg += ", and index must be provided" raise ValueError(msg) elif index.shape != complex_structurefactor.shape: msg = f"`complex_structurefactor` {complex_structurefactor.shape} does not have same " msg += f"shape as `index` {index.shape}" raise ShapeMismatchError(msg) amplitudes = rs.DataSeries( np.abs(complex_structurefactor), index=index, dtype=rs.StructureFactorAmplitudeDtype(), name="F", ) phases = rs.DataSeries( np.angle(complex_structurefactor, deg=True), index=index, dtype=rs.PhaseDtype(), name="PHI", ) dataset = rs.DataSet( rs.concat([amplitudes, phases], axis=1), index=index, cell=cell, spacegroup=spacegroup, ) return cls(dataset) @classmethod def from_gemmi( cls, gemmi_mtz: gemmi.Mtz, *, amplitude_column: str = "F", phase_column: str = "PHI", uncertainty_column: str | None = "SIGF", ) -> Map: return cls( rs.DataSet(gemmi_mtz), amplitude_column=amplitude_column, phase_column=phase_column, uncertainty_column=uncertainty_column, ) @classmethod @cellify("cell") def from_3d_numpy_map( cls, map_grid: np.ndarray, *, spacegroup: Any, cell: CellType, high_resolution_limit: float ) -> Map: """ Create a `Map` from a 3d grid of voxel values stored in a numpy array. Parameters ---------- map_grid: np.ndarray The array, laid out in Gemmi format spacegroup: Any Specifies which spacegroup, can be an int, gemmi.SpaceGroup, ... cell Specifies cell, can be a tuple, gemmi.Cell, ... high_resolution_limit: float The resolution of the map, irregardless of the sampling; we need this to infer the map sampling Returns ------- map: Map The map coefficients See Also -------- For information about Gemmi data layout: https://gemmi.readthedocs.io/en/latest/grid.html """ if len(map_grid.shape) != NUMBER_OF_DIMENSIONS_IN_UNIVERSE: msg = "`map_grid` should be a 3D array representing a realspace map" raise ValueError(msg) ccp4 = numpy_array_to_map( map_grid, spacegroup=spacegroup, cell=cell, ) return cls.from_ccp4_map( ccp4_map=ccp4, high_resolution_limit=high_resolution_limit, ) def to_ccp4_map(self, *, map_sampling: int) -> gemmi.Ccp4Map: map_coefficients_gemmi_format = self.to_gemmi() ccp4_map = gemmi.Ccp4Map() ccp4_map.grid = map_coefficients_gemmi_format.transform_f_phi_to_map( self._amplitude_column, self._phase_column, sample_rate=map_sampling, ) ccp4_map.update_ccp4_header() return ccp4_map @classmethod def from_ccp4_map( cls, ccp4_map: gemmi.Ccp4Map, *, high_resolution_limit: float, amplitude_column: str = "F", phase_column: str = "PHI", ) -> Map: # to ensure we include the final shell of reflections, add a small buffer to the resolution gemmi_structure_factors = gemmi.transform_map_to_f_phi(ccp4_map.grid, half_l=False) data = gemmi_structure_factors.prepare_asu_data( dmin=high_resolution_limit - GEMMI_HIGH_RESOLUTION_BUFFER, with_sys_abs=True, ) mtz = gemmi.Mtz(with_base=True) mtz.spacegroup = gemmi_structure_factors.spacegroup mtz.set_cell_for_all(gemmi_structure_factors.unit_cell) mtz.add_dataset("FromMap") mtz.add_column(amplitude_column, "F") mtz.add_column(phase_column, "P") mtz.set_data(data) mtz.switch_to_asu_hkl() dataset = super().from_gemmi(mtz) return cls(dataset, amplitude_column=amplitude_column, phase_column=phase_column) def write_mtz(self, file_path: str | Path) -> None: path_cast_to_str = str(file_path) super().write_mtz(path_cast_to_str) @classmethod def read_mtz_file( cls, file_path: str | Path, *, amplitude_column: str = "F", phase_column: str = "PHI", uncertainty_column: str | None = "SIGF", ) -> Map: gemmi_mtz = gemmi.read_mtz_file(str(file_path)) return cls.from_gemmi( gemmi_mtz, amplitude_column=amplitude_column, phase_column=phase_column, uncertainty_column=uncertainty_column, )
A high-level interface for a crystallographic map of any kind.
Specifically, this class is based on a reciprocalspaceship
DataSet
(ie. a crystographically-aware pandasDataFrame
), but is restricted to three and only three special columns corresponding to:- (real) <code>amplitudes</code> - <code>phases</code> - <code>uncertainties</code>, ie the standard deviation for a Gaussian around the amplitude mean
In addition, the class maintains an
index
of Miller indices, as well as the crystallographic metadata supported byrs.DataSet
, most notably acell
andspacegroup
.These structured data enable this class to perform some routine map-based caluclations, such as
- computing a real-space map, or computing map coefficients from a map - converting between complex Cartesian and polar (amplitude/phase) structure factors - reading and writing mtz and ccp4 map files
all in a way that automatically facilitates the bookkeeping tasks normally associated with these relatively simple operations.
Ancestors
- reciprocalspaceship.dataset.DataSet
- pandas.core.frame.DataFrame
- pandas.core.generic.NDFrame
- pandas.core.base.PandasObject
- pandas.core.accessor.DirNamesMixin
- pandas.core.indexing.IndexingMixin
- pandas.core.arraylike.OpsMixin
Static methods
def from_3d_numpy_map(cls,
map_grid: np.ndarray,
*,
spacegroup: Any,
cell: CellType,
high_resolution_limit: float) ‑> Map-
Expand source code
@classmethod @cellify("cell") def from_3d_numpy_map( cls, map_grid: np.ndarray, *, spacegroup: Any, cell: CellType, high_resolution_limit: float ) -> Map: """ Create a `Map` from a 3d grid of voxel values stored in a numpy array. Parameters ---------- map_grid: np.ndarray The array, laid out in Gemmi format spacegroup: Any Specifies which spacegroup, can be an int, gemmi.SpaceGroup, ... cell Specifies cell, can be a tuple, gemmi.Cell, ... high_resolution_limit: float The resolution of the map, irregardless of the sampling; we need this to infer the map sampling Returns ------- map: Map The map coefficients See Also -------- For information about Gemmi data layout: https://gemmi.readthedocs.io/en/latest/grid.html """ if len(map_grid.shape) != NUMBER_OF_DIMENSIONS_IN_UNIVERSE: msg = "`map_grid` should be a 3D array representing a realspace map" raise ValueError(msg) ccp4 = numpy_array_to_map( map_grid, spacegroup=spacegroup, cell=cell, ) return cls.from_ccp4_map( ccp4_map=ccp4, high_resolution_limit=high_resolution_limit, )
Create a
Map
from a 3d grid of voxel values stored in a numpy array.Parameters
map_grid
:np.ndarray
- The array, laid out in Gemmi format
spacegroup
:Any
- Specifies which spacegroup, can be an int, gemmi.SpaceGroup, …
cell
- Specifies cell, can be a tuple, gemmi.Cell, …
high_resolution_limit
:float
- The resolution of the map, irregardless of the sampling; we need this to infer the map sampling
Returns
map
:Map
- The map coefficients
See Also
For information about Gemmi data layout: <https://gemmi.readthedocs.io/en/latest/grid.html>
def from_ccp4_map(ccp4_map: gemmi.Ccp4Map,
*,
high_resolution_limit: float,
amplitude_column: str = 'F',
phase_column: str = 'PHI') ‑> Mapdef from_gemmi(gemmi_mtz: gemmi.Mtz,
*,
amplitude_column: str = 'F',
phase_column: str = 'PHI',
uncertainty_column: str | None = 'SIGF') ‑> Map-
Creates DataSet object from gemmi.Mtz object.
If the gemmi.Mtz object contains an M/ISYM column and contains duplicated Miller indices, an unmerged DataSet will be constructed. The Miller indices will be mapped to their observed values, and a partiality flag will be extracted and stored as a boolean column with the label,
PARTIAL
. Otherwise, a merged DataSet will be constructed.If columns are found with the
MTZInt
dtype and are labeledPARTIAL
orCENTRIC
, these will be interpreted as boolean flags used to label partial or centric reflections, respectively.Parameters
gemmiMtz
:gemmi.Mtz
Returns
DataSet
def from_structurefactor(cls,
complex_structurefactor: np.ndarray | rs.DataSeries,
*,
index: pd.Index | None = None,
cell: CellType | None = None,
spacegroup: SpacegroupType | None = None) ‑> Map-
Expand source code
@classmethod @cellify("cell") @spacegroupify("spacegroup") def from_structurefactor( cls, complex_structurefactor: np.ndarray | rs.DataSeries, *, index: pd.Index | None = None, cell: CellType | None = None, spacegroup: SpacegroupType | None = None, ) -> Map: # 1. `rs.DataSet.from_structurefactor` exists, but it operates on a column that's already # part of the dataset; having such a (redundant) column is forbidden by `Map` # 2. recprocalspaceship has a `from_structurefactor` method, but it is occasionally # mangling indices for me when the input is a numpy array, as of 16 OCT 24 # # hopefully we can resolve these and reuse code! - @tjlane if index is None: if isinstance(complex_structurefactor, rs.DataSeries) and hasattr( complex_structurefactor, "index" ): index = complex_structurefactor.index else: msg = "if `complex_structurefactor` is not a `DataSeries` with an `index` attribute" msg += ", and index must be provided" raise ValueError(msg) elif index.shape != complex_structurefactor.shape: msg = f"`complex_structurefactor` {complex_structurefactor.shape} does not have same " msg += f"shape as `index` {index.shape}" raise ShapeMismatchError(msg) amplitudes = rs.DataSeries( np.abs(complex_structurefactor), index=index, dtype=rs.StructureFactorAmplitudeDtype(), name="F", ) phases = rs.DataSeries( np.angle(complex_structurefactor, deg=True), index=index, dtype=rs.PhaseDtype(), name="PHI", ) dataset = rs.DataSet( rs.concat([amplitudes, phases], axis=1), index=index, cell=cell, spacegroup=spacegroup, ) return cls(dataset)
def read_mtz_file(file_path: str | Path,
*,
amplitude_column: str = 'F',
phase_column: str = 'PHI',
uncertainty_column: str | None = 'SIGF') ‑> Map
Instance variables
prop amplitude_column_name : str
-
Expand source code
@property def amplitude_column_name(self) -> str: return self._amplitude_column
prop amplitudes : rs.DataSeries
-
Expand source code
@property def amplitudes(self) -> rs.DataSeries: return self[self._amplitude_column]
prop has_uncertainties : bool
-
Expand source code
@property def has_uncertainties(self) -> bool: return self._uncertainty_column in self.columns
prop phase_column_name : str
-
Expand source code
@property def phase_column_name(self) -> str: return self._phase_column
prop phases : rs.DataSeries
-
Expand source code
@property def phases(self) -> rs.DataSeries: return self[self._phase_column]
prop resolution_limits : tuple[float, float]
-
Expand source code
@property def resolution_limits(self) -> tuple[float, float]: d_hkl = self.compute_dHKL() return np.max(d_hkl), np.min(d_hkl)
prop uncertainties : rs.DataSeries
-
Expand source code
@property def uncertainties(self) -> rs.DataSeries: if self.has_uncertainties: return self[self._uncertainty_column] msg = "uncertainties not set for Map object" raise AttributeError(msg)
prop uncertainties_column_name : str
-
Expand source code
@property def uncertainties_column_name(self) -> str: if self.has_uncertainties: if not isinstance(self._uncertainty_column, str): msg = "misconfigured uncertainty column" raise RuntimeError(msg) return self._uncertainty_column msg = "uncertainties not set for Map object" raise AttributeError(msg)
Methods
def canonicalize_amplitudes(self) ‑> None
-
Expand source code
def canonicalize_amplitudes(self) -> None: canonicalize_amplitudes( self, amplitude_column=self._amplitude_column, phase_column=self._phase_column, inplace=True, )
def compute_dHKL(self) ‑> reciprocalspaceship.dataseries.DataSeries
-
Expand source code
def compute_dHKL(self) -> rs.DataSeries: # noqa: N802, caps from reciprocalspaceship # rs adds a "dHKL" column to the DataFrame # that could be enabled by adding "dHKL" to _allowed_columns - @tjlane if not hasattr(self, "cell"): msg = "no `cell` attribute set, cannot compute resolution (d-values)" raise AttributeError(msg) d_hkl = self.cell.calculate_d_array(self.get_hkls()) return rs.DataSeries(d_hkl, dtype="R", index=self.index)
Compute the real space lattice plane spacing, d, associated with the HKL indices in the object.
Parameters
inplace
:bool
- Whether to add the column in place or return a copy
def drop(self, labels: Any, *, inplace: bool = False) ‑> None | Map
-
Expand source code
def drop(self, labels: Any, *, inplace: bool = False) -> None | Map: return super().drop(labels=labels, axis="index", columns=None, inplace=inplace)
Drop specified labels from rows or columns.
Remove rows or columns by specifying label names and corresponding axis, or by directly specifying index or column names. When using a multi-index, labels on different levels can be removed by specifying the level. See the :ref:
user guide <advanced.shown_levels>
for more information about the now unused levels.Parameters
labels
:single label
orlist-like
- Index or column labels to drop. A tuple will be used as a single label and not treated as a list-like.
axis
:{0
or'index', 1
or'columns'}
, default0
- Whether to drop labels from the index (0 or 'index') or columns (1 or 'columns').
index
:single label
orlist-like
- Alternative to specifying axis (
labels, axis=0
is equivalent toindex=labels
). columns
:single label
orlist-like
- Alternative to specifying axis (
labels, axis=1
is equivalent tocolumns=labels
). level
:int
orlevel name
, optional- For MultiIndex, level from which the labels will be removed.
inplace
:bool
, defaultFalse
- If False, return a copy. Otherwise, do operation in place and return None.
errors
:{'ignore', 'raise'}
, default'raise'
- If 'ignore', suppress error and only existing labels are dropped.
Returns
DataFrame
orNone
- Returns DataFrame or None DataFrame with the specified index or column labels removed or None if inplace=True.
Raises
KeyError
- If any of the labels is not found in the selected axis.
See Also
DataFrame.loc
- Label-location based indexer for selection by label.
DataFrame.dropna
- Return DataFrame with labels on given axis omitted where (all or any) data are missing.
DataFrame.drop_duplicates
- Return DataFrame with duplicate rows removed, optionally only considering certain columns.
Series.drop
- Return Series with specified index labels removed.
Examples
>>> df = pd.DataFrame(np.arange(12).reshape(3, 4), ... columns=['A', 'B', 'C', 'D']) >>> df A B C D 0 0 1 2 3 1 4 5 6 7 2 8 9 10 11
Drop columns
>>> df.drop(['B', 'C'], axis=1) A D 0 0 3 1 4 7 2 8 11
>>> df.drop(columns=['B', 'C']) A D 0 0 3 1 4 7 2 8 11
Drop a row by index
>>> df.drop([0, 1]) A B C D 2 8 9 10 11
Drop columns and/or rows of MultiIndex DataFrame
>>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) >>> df = pd.DataFrame(index=midx, columns=['big', 'small'], ... data=[[45, 30], [200, 100], [1.5, 1], [30, 20], ... [250, 150], [1.5, 0.8], [320, 250], ... [1, 0.8], [0.3, 0.2]]) >>> df big small llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 weight 250.0 150.0 length 1.5 0.8 falcon speed 320.0 250.0 weight 1.0 0.8 length 0.3 0.2
Drop a specific index combination from the MultiIndex DataFrame, i.e., drop the combination
'falcon'
and'weight'
, which deletes only the corresponding row>>> df.drop(index=('falcon', 'weight')) big small llama speed 45.0 30.0 weight 200.0 100.0 length 1.5 1.0 cow speed 30.0 20.0 weight 250.0 150.0 length 1.5 0.8 falcon speed 320.0 250.0 length 0.3 0.2
>>> df.drop(index='cow', columns='small') big llama speed 45.0 weight 200.0 length 1.5 falcon speed 320.0 weight 1.0 length 0.3
>>> df.drop(index='length', level=1) big small llama speed 45.0 30.0 weight 200.0 100.0 cow speed 30.0 20.0 weight 250.0 150.0 falcon speed 320.0 250.0 weight 1.0 0.8
def get_hkls(self) ‑> numpy.ndarray
-
Expand source code
def get_hkls(self) -> np.ndarray: # overwrite rs implt'n, return w/o modifying self -> same behavior, under testing - @tjlane # this is a rather horrible thing to do and we should fix it # best is to push changes upstream hkl_names = ["H", "K", "L"] if self.index.names == hkl_names: hkls = self.index.to_frame().to_numpy(dtype=np.int32) elif all(col in self.columns for col in hkl_names): # we need to pull out each column as a separate DataSeries so that we don't try to # create a new Map object without F, PHI hkls = np.vstack([self[col].to_numpy(dtype=np.int32) for col in hkl_names]).T else: msg = f"cannot find `H`, `K`, and `L` columns in index or columns {self.columns}" raise ValueError(msg) if hkls.shape[-1] != NUMBER_OF_DIMENSIONS_IN_UNIVERSE: msg = f"something went wrong, HKL array has a funny shape: {hkls.shape}" raise RuntimeError(msg) return hkls
Get the Miller indices in the DataSet as a ndarray.
Returns
hkl
:ndarray, shape=(n_reflections, 3)
- Miller indices in DataSet
def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool = False) ‑> None
-
Expand source code
def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool = False) -> None: if column in self._allowed_columns: super().insert(loc, column, value, allow_duplicates=allow_duplicates) else: msg = "general column assignment not allowed for Map objects" msg += f"special columns allowed: {self._allowed_columns}; " msg += "see also Map.set_uncertainties(...)" raise MapMutabilityError(msg)
Insert column into DataFrame at specified location.
Raises a ValueError if
column
is already contained in the DataFrame, unlessallow_duplicates
is set to True.Parameters
loc
:int
- Insertion index. Must verify 0 <= loc <= len(columns).
column
:str, number,
orhashable object
- Label of the inserted column.
value
:Scalar, Series,
orarray-like
- Content of the inserted column.
allow_duplicates
:bool
, optional, defaultlib.no_default
- Allow duplicate column labels to be created.
See Also
Index.insert
- Insert new item by index.
Examples
>>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df col1 col2 0 1 3 1 2 4 >>> df.insert(1, "newcol", [99, 99]) >>> df col1 newcol col2 0 1 99 3 1 2 99 4 >>> df.insert(0, "col1", [100, 100], allow_duplicates=True) >>> df col1 col1 newcol col2 0 100 1 99 3 1 100 2 99 4
Notice that pandas uses index alignment in case of
value
from typeSeries
:>>> df.insert(0, "col0", pd.Series([5, 6], index=[1, 2])) >>> df col0 col1 col1 newcol col2 0 NaN 100 1 99 3 1 5.0 100 2 99 4
def set_uncertainties(self, values: rs.DataSeries, column_name: str = 'SIGF') ‑> None
-
Expand source code
def set_uncertainties(self, values: rs.DataSeries, column_name: str = "SIGF") -> None: values = self._verify_uncertainty_type(values) if self.has_uncertainties: self.uncertainties = values else: # otherwise, create a new column self._uncertainty_column = column_name number_of_columns = len(self.columns) number_of_columns_with_just_amplitudes_and_phases = 2 if number_of_columns != number_of_columns_with_just_amplitudes_and_phases: msg = "Misconfigured columns" raise RuntimeError(msg) super().insert( number_of_columns, self._uncertainty_column, values, allow_duplicates=False )
def to_ccp4_map(self, *, map_sampling: int) ‑> gemmi.Ccp4Map
-
Expand source code
def to_ccp4_map(self, *, map_sampling: int) -> gemmi.Ccp4Map: map_coefficients_gemmi_format = self.to_gemmi() ccp4_map = gemmi.Ccp4Map() ccp4_map.grid = map_coefficients_gemmi_format.transform_f_phi_to_map( self._amplitude_column, self._phase_column, sample_rate=map_sampling, ) ccp4_map.update_ccp4_header() return ccp4_map
def to_structurefactor(self) ‑> reciprocalspaceship.dataseries.DataSeries
-
Expand source code
def to_structurefactor(self) -> rs.DataSeries: """Return a DataSeries of complex structure factor amplitudes""" return super().to_structurefactor(self._amplitude_column, self._phase_column)
Return a DataSeries of complex structure factor amplitudes
def write_mtz(self, file_path: str | Path) ‑> None
-
Expand source code
def write_mtz(self, file_path: str | Path) -> None: path_cast_to_str = str(file_path) super().write_mtz(path_cast_to_str)
Write DataSet to MTZ file.
If
DataSet.merged == False
, the reflections will be mapped to the reciprocal space ASU, and a M/ISYM column will be constructed.If boolean flags with the label
PARTIAL
orCENTRIC
are found in the DataSet, these will be cast to theMTZInt
dtype, and included in the output MTZ file.Parameters
mtzfile
:str
orfile
- name of an mtz file or a file object
skip_problem_mtztypes
:bool
- Whether to skip columns in DataSet that do not have specified MTZ datatypes
project_name
:str
- Project name to assign to MTZ file
crystal_name
:str
- Crystal name to assign to MTZ file
dataset_name
:str
- Dataset name to assign to MTZ file
class MapMutabilityError (*args, **kwargs)
-
Expand source code
class MapMutabilityError(RuntimeError): ...
Unspecified run-time error.
Ancestors
- builtins.RuntimeError
- builtins.Exception
- builtins.BaseException
class MissingUncertaintiesError (*args, **kwargs)
-
Expand source code
class MissingUncertaintiesError(AttributeError): ...
Attribute not found.
Ancestors
- builtins.AttributeError
- builtins.Exception
- builtins.BaseException