Module meteor.rsmap

Map class definition and related functions

Functions

def assert_is_map(obj: Any, *, require_uncertainties: bool) ‑> None
Expand source code
def assert_is_map(obj: Any, *, require_uncertainties: bool) -> None:
    if not isinstance(obj, Map):
        msg = f"expected {obj} to be a rsmap.Map, got {type(obj)}"
        raise TypeError(msg)
    if require_uncertainties and (not obj.has_uncertainties):
        msg = f"{obj} Map missing required uncertainty column"
        raise MissingUncertaintiesError(msg)

Classes

class Map (data: dict | pd.DataFrame | rs.DataSet,
*,
amplitude_column: str = 'F',
phase_column: str = 'PHI',
uncertainty_column: str | None = 'SIGF',
**kwargs: Any)
Expand source code
class Map(rs.DataSet):
    """
    A high-level interface for a crystallographic map of any kind.

    Specifically, this class is based on a reciprocalspaceship `DataSet` (ie. a
    crystographically-aware pandas `DataFrame`), but is restricted to three and only three
    special columns corresponding to:

        - (real) `amplitudes`
        - `phases`
        - `uncertainties`, ie the standard deviation for a Gaussian around the amplitude mean

    In addition, the class maintains an `index` of Miller indices, as well as the crystallographic
    metadata supported by `rs.DataSet`, most notably a `cell` and `spacegroup`.

    These structured data enable this class to perform some routine map-based caluclations, such as

        - computing a real-space map, or computing map coefficients from a map
        - converting between complex Cartesian and polar (amplitude/phase) structure factors
        - reading and writing mtz and ccp4 map files

    all in a way that automatically facilitates the bookkeeping tasks normally associated with
    these relatively simple operations.
    """

    # these columns are always allowed
    _allowed_columns: ClassVar[list[str]] = ["H", "K", "L"]

    # in addition, __init__ specifies 3 columns special that can be named dynamically to support:
    # amplitudes, phases, uncertainties; all other columns are forbidden

    @cellify
    @spacegroupify
    def __init__(
        self,
        data: dict | pd.DataFrame | rs.DataSet,
        *,
        amplitude_column: str = "F",
        phase_column: str = "PHI",
        uncertainty_column: str | None = "SIGF",
        **kwargs: Any,
    ) -> None:
        super().__init__(data=data, **kwargs)

        self._amplitude_column = amplitude_column
        self._phase_column = phase_column
        self._uncertainty_column = uncertainty_column

        for column in [self._amplitude_column, self._phase_column]:
            if column not in self.columns:
                msg = "amplitude and phase columns must be in input `data`... "
                msg += f"looking for `{column}`, found `{self.columns}`"
                raise KeyError(msg)

        columns_to_keep = [*self._allowed_columns, amplitude_column, phase_column]
        if uncertainty_column and (uncertainty_column in self.columns):
            columns_to_keep.append(uncertainty_column)

        # this feels dangerous, but I cannot find a better way | author: @tjlane
        excess_columns = set(self.columns) - set(columns_to_keep)
        for column in excess_columns:
            del self[column]

        # ensure types correct
        self.amplitudes = self._verify_amplitude_type(self.amplitudes, fix=True)
        self.phases = self._verify_phase_type(self.phases, fix=True)
        if self.has_uncertainties:
            self.uncertainties = self._verify_uncertainty_type(self.uncertainties, fix=True)

    @property
    def _constructor(self) -> Callable[[Any], Map]:
        def constructor_fxn(*args: Any, **kwargs: Any) -> Map:
            return Map(
                *args,
                amplitude_column=self._amplitude_column,
                phase_column=self._phase_column,
                uncertainty_column=self._uncertainty_column,
                **kwargs,
            )

        return constructor_fxn

    @property
    def _constructor_sliced(self) -> Callable[[Any], rs.DataSeries]:
        return rs.DataSeries

    def _verify_type(
        self,
        name: str,
        allowed_types: list[type],
        dataseries: rs.DataSeries,
        *,
        fix: bool,
        cast_fix_to: type,
    ) -> rs.DataSeries:
        if dataseries.dtype not in allowed_types:
            if fix:
                return dataseries.astype(cast_fix_to)
            msg = f"dtype for passed {name} not allowed, got: {dataseries.dtype} allow {allowed_types}"
            raise AssertionError(msg)
        return dataseries

    def _verify_amplitude_type(
        self,
        dataseries: rs.DataSeries,
        *,
        fix: bool = True,
    ) -> rs.DataSeries:
        name = "amplitude"
        amplitude_dtypes = [
            rs.StructureFactorAmplitudeDtype(),
            rs.FriedelStructureFactorAmplitudeDtype(),
            rs.NormalizedStructureFactorAmplitudeDtype(),
            rs.AnomalousDifferenceDtype(),
        ]
        return self._verify_type(
            name,
            amplitude_dtypes,
            dataseries,
            fix=fix,
            cast_fix_to=rs.StructureFactorAmplitudeDtype(),
        )

    def _verify_phase_type(self, dataseries: rs.DataSeries, *, fix: bool = True) -> rs.DataSeries:
        name = "phase"
        phase_dtypes = [rs.PhaseDtype()]
        return self._verify_type(
            name, phase_dtypes, dataseries, fix=fix, cast_fix_to=rs.PhaseDtype()
        )

    def _verify_uncertainty_type(
        self,
        dataseries: rs.DataSeries,
        *,
        fix: bool = True,
    ) -> rs.DataSeries:
        name = "uncertainties"
        uncertainty_dtypes = [
            rs.StandardDeviationDtype(),
            rs.StandardDeviationFriedelIDtype(),
            rs.StandardDeviationFriedelSFDtype(),
        ]
        return self._verify_type(
            name, uncertainty_dtypes, dataseries, fix=fix, cast_fix_to=rs.StandardDeviationDtype()
        )

    def __setitem__(self, key: str, value: Any) -> None:
        allowed = list(self.columns) + self._allowed_columns
        if key not in allowed:
            msg = "column assignment not allowed for Map objects"
            raise MapMutabilityError(msg)
        super().__setitem__(key, value)

    def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool = False) -> None:
        if column in self._allowed_columns:
            super().insert(loc, column, value, allow_duplicates=allow_duplicates)
        else:
            msg = "general column assignment not allowed for Map objects"
            msg += f"special columns allowed: {self._allowed_columns}; "
            msg += "see also Map.set_uncertainties(...)"
            raise MapMutabilityError(msg)

    @overload
    def drop(self, labels: Any, *, inplace: Literal[True]) -> None: ...

    @overload
    def drop(self, labels: Any, *, inplace: Literal[False]) -> Map: ...

    def drop(self, labels: Any, *, inplace: bool = False) -> None | Map:
        return super().drop(labels=labels, axis="index", columns=None, inplace=inplace)

    def get_hkls(self) -> np.ndarray:
        # overwrite rs implt'n, return w/o modifying self -> same behavior, under testing - @tjlane
        # this is a rather horrible thing to do and we should fix it
        # best is to push changes upstream
        hkl_names = ["H", "K", "L"]
        if self.index.names == hkl_names:
            hkls = self.index.to_frame().to_numpy(dtype=np.int32)
        elif all(col in self.columns for col in hkl_names):
            # we need to pull out each column as a separate DataSeries so that we don't try to
            # create a new Map object without F, PHI
            hkls = np.vstack([self[col].to_numpy(dtype=np.int32) for col in hkl_names]).T
        else:
            msg = f"cannot find `H`, `K`, and `L` columns in index or columns {self.columns}"
            raise ValueError(msg)

        if hkls.shape[-1] != NUMBER_OF_DIMENSIONS_IN_UNIVERSE:
            msg = f"something went wrong, HKL array has a funny shape: {hkls.shape}"
            raise RuntimeError(msg)

        return hkls

    def compute_dHKL(self) -> rs.DataSeries:  # noqa: N802, caps from reciprocalspaceship
        # rs adds a "dHKL" column to the DataFrame
        # that could be enabled by adding "dHKL" to _allowed_columns - @tjlane
        if not hasattr(self, "cell"):
            msg = "no `cell` attribute set, cannot compute resolution (d-values)"
            raise AttributeError(msg)
        d_hkl = self.cell.calculate_d_array(self.get_hkls())
        return rs.DataSeries(d_hkl, dtype="R", index=self.index)

    @property
    def resolution_limits(self) -> tuple[float, float]:
        d_hkl = self.compute_dHKL()
        return np.max(d_hkl), np.min(d_hkl)

    @property
    def amplitudes(self) -> rs.DataSeries:
        return self[self._amplitude_column]

    @amplitudes.setter
    def amplitudes(self, values: rs.DataSeries) -> None:
        values = self._verify_amplitude_type(values)
        self[self._amplitude_column] = values

    @property
    def amplitude_column_name(self) -> str:
        return self._amplitude_column

    @property
    def phases(self) -> rs.DataSeries:
        return self[self._phase_column]

    @phases.setter
    def phases(self, values: rs.DataSeries) -> None:
        values = self._verify_phase_type(values)
        self[self._phase_column] = values

    @property
    def phase_column_name(self) -> str:
        return self._phase_column

    @property
    def has_uncertainties(self) -> bool:
        return self._uncertainty_column in self.columns

    @property
    def uncertainties(self) -> rs.DataSeries:
        if self.has_uncertainties:
            return self[self._uncertainty_column]
        msg = "uncertainties not set for Map object"
        raise AttributeError(msg)

    @uncertainties.setter
    def uncertainties(self, values: rs.DataSeries) -> None:
        if self.has_uncertainties:
            values = self._verify_uncertainty_type(values)
            self[self._uncertainty_column] = values  # type: ignore[index]
        else:
            msg = "uncertainties unset, and Pandas forbids assignment via attributes; "
            msg += "to initialize, use Map.set_uncertainties(...)"
            raise AttributeError(msg)

    def set_uncertainties(self, values: rs.DataSeries, column_name: str = "SIGF") -> None:
        values = self._verify_uncertainty_type(values)

        if self.has_uncertainties:
            self.uncertainties = values
        else:
            # otherwise, create a new column
            self._uncertainty_column = column_name
            number_of_columns = len(self.columns)
            number_of_columns_with_just_amplitudes_and_phases = 2
            if number_of_columns != number_of_columns_with_just_amplitudes_and_phases:
                msg = "Misconfigured columns"
                raise RuntimeError(msg)
            super().insert(
                number_of_columns, self._uncertainty_column, values, allow_duplicates=False
            )

    @property
    def uncertainties_column_name(self) -> str:
        if self.has_uncertainties:
            if not isinstance(self._uncertainty_column, str):
                msg = "misconfigured uncertainty column"
                raise RuntimeError(msg)
            return self._uncertainty_column
        msg = "uncertainties not set for Map object"
        raise AttributeError(msg)

    def canonicalize_amplitudes(self) -> None:
        canonicalize_amplitudes(
            self,
            amplitude_column=self._amplitude_column,
            phase_column=self._phase_column,
            inplace=True,
        )

    def to_structurefactor(self) -> rs.DataSeries:
        """Return a DataSeries of complex structure factor amplitudes"""
        return super().to_structurefactor(self._amplitude_column, self._phase_column)

    @overload
    @classmethod
    def from_structurefactor(
        cls,
        complex_structurefactor: rs.DataSeries,
        *,
        index: None = None,
        cell: CellType | None = None,
        spacegroup: SpacegroupType | None = None,
    ) -> Map: ...

    @overload
    @classmethod
    def from_structurefactor(
        cls,
        complex_structurefactor: np.ndarray,
        *,
        index: pd.Index,
        cell: CellType | None = None,
        spacegroup: SpacegroupType | None = None,
    ) -> Map: ...

    @classmethod
    @cellify("cell")
    @spacegroupify("spacegroup")
    def from_structurefactor(
        cls,
        complex_structurefactor: np.ndarray | rs.DataSeries,
        *,
        index: pd.Index | None = None,
        cell: CellType | None = None,
        spacegroup: SpacegroupType | None = None,
    ) -> Map:
        # 1. `rs.DataSet.from_structurefactor` exists, but it operates on a column that's already
        #    part of the dataset; having such a (redundant) column is forbidden by `Map`
        # 2. recprocalspaceship has a `from_structurefactor` method, but it is occasionally
        #    mangling indices for me when the input is a numpy array, as of 16 OCT 24
        #
        # hopefully we can resolve these and reuse code! - @tjlane

        if index is None:
            if isinstance(complex_structurefactor, rs.DataSeries) and hasattr(
                complex_structurefactor, "index"
            ):
                index = complex_structurefactor.index

            else:
                msg = "if `complex_structurefactor` is not a `DataSeries` with an `index` attribute"
                msg += ", and index must be provided"
                raise ValueError(msg)

        elif index.shape != complex_structurefactor.shape:
            msg = f"`complex_structurefactor` {complex_structurefactor.shape} does not have same "
            msg += f"shape as `index` {index.shape}"
            raise ShapeMismatchError(msg)

        amplitudes = rs.DataSeries(
            np.abs(complex_structurefactor),
            index=index,
            dtype=rs.StructureFactorAmplitudeDtype(),
            name="F",
        )

        phases = rs.DataSeries(
            np.angle(complex_structurefactor, deg=True),
            index=index,
            dtype=rs.PhaseDtype(),
            name="PHI",
        )

        dataset = rs.DataSet(
            rs.concat([amplitudes, phases], axis=1),
            index=index,
            cell=cell,
            spacegroup=spacegroup,
        )

        return cls(dataset)

    @classmethod
    def from_gemmi(
        cls,
        gemmi_mtz: gemmi.Mtz,
        *,
        amplitude_column: str = "F",
        phase_column: str = "PHI",
        uncertainty_column: str | None = "SIGF",
    ) -> Map:
        return cls(
            rs.DataSet(gemmi_mtz),
            amplitude_column=amplitude_column,
            phase_column=phase_column,
            uncertainty_column=uncertainty_column,
        )

    @classmethod
    @cellify("cell")
    def from_3d_numpy_map(
        cls, map_grid: np.ndarray, *, spacegroup: Any, cell: CellType, high_resolution_limit: float
    ) -> Map:
        """
        Create a `Map` from a 3d grid of voxel values stored in a numpy array.

        Parameters
        ----------
        map_grid: np.ndarray
            The array, laid out in Gemmi format
        spacegroup: Any
            Specifies which spacegroup, can be an int, gemmi.SpaceGroup, ...
        cell
            Specifies cell, can be a tuple, gemmi.Cell, ...
        high_resolution_limit: float
            The resolution of the map, irregardless of the sampling; we need this to infer the map
            sampling

        Returns
        -------
        map: Map
            The map coefficients

        See Also
        --------
        For information about Gemmi data layout: https://gemmi.readthedocs.io/en/latest/grid.html
        """
        if len(map_grid.shape) != NUMBER_OF_DIMENSIONS_IN_UNIVERSE:
            msg = "`map_grid` should be a 3D array representing a realspace map"
            raise ValueError(msg)
        ccp4 = numpy_array_to_map(
            map_grid,
            spacegroup=spacegroup,
            cell=cell,
        )
        return cls.from_ccp4_map(
            ccp4_map=ccp4,
            high_resolution_limit=high_resolution_limit,
        )

    def to_ccp4_map(self, *, map_sampling: int) -> gemmi.Ccp4Map:
        map_coefficients_gemmi_format = self.to_gemmi()
        ccp4_map = gemmi.Ccp4Map()
        ccp4_map.grid = map_coefficients_gemmi_format.transform_f_phi_to_map(
            self._amplitude_column,
            self._phase_column,
            sample_rate=map_sampling,
        )
        ccp4_map.update_ccp4_header()
        return ccp4_map

    @classmethod
    def from_ccp4_map(
        cls,
        ccp4_map: gemmi.Ccp4Map,
        *,
        high_resolution_limit: float,
        amplitude_column: str = "F",
        phase_column: str = "PHI",
    ) -> Map:
        # to ensure we include the final shell of reflections, add a small buffer to the resolution
        gemmi_structure_factors = gemmi.transform_map_to_f_phi(ccp4_map.grid, half_l=False)
        data = gemmi_structure_factors.prepare_asu_data(
            dmin=high_resolution_limit - GEMMI_HIGH_RESOLUTION_BUFFER,
            with_sys_abs=True,
        )

        mtz = gemmi.Mtz(with_base=True)
        mtz.spacegroup = gemmi_structure_factors.spacegroup
        mtz.set_cell_for_all(gemmi_structure_factors.unit_cell)
        mtz.add_dataset("FromMap")

        mtz.add_column(amplitude_column, "F")
        mtz.add_column(phase_column, "P")

        mtz.set_data(data)
        mtz.switch_to_asu_hkl()
        dataset = super().from_gemmi(mtz)

        return cls(dataset, amplitude_column=amplitude_column, phase_column=phase_column)

    def write_mtz(self, file_path: str | Path) -> None:
        path_cast_to_str = str(file_path)
        super().write_mtz(path_cast_to_str)

    @classmethod
    def read_mtz_file(
        cls,
        file_path: str | Path,
        *,
        amplitude_column: str = "F",
        phase_column: str = "PHI",
        uncertainty_column: str | None = "SIGF",
    ) -> Map:
        gemmi_mtz = gemmi.read_mtz_file(str(file_path))
        return cls.from_gemmi(
            gemmi_mtz,
            amplitude_column=amplitude_column,
            phase_column=phase_column,
            uncertainty_column=uncertainty_column,
        )

A high-level interface for a crystallographic map of any kind.

Specifically, this class is based on a reciprocalspaceship DataSet (ie. a crystographically-aware pandas DataFrame), but is restricted to three and only three special columns corresponding to:

- (real) <code>amplitudes</code>
- <code>phases</code>
- <code>uncertainties</code>, ie the standard deviation for a Gaussian around the amplitude mean

In addition, the class maintains an index of Miller indices, as well as the crystallographic metadata supported by rs.DataSet, most notably a cell and spacegroup.

These structured data enable this class to perform some routine map-based caluclations, such as

- computing a real-space map, or computing map coefficients from a map
- converting between complex Cartesian and polar (amplitude/phase) structure factors
- reading and writing mtz and ccp4 map files

all in a way that automatically facilitates the bookkeeping tasks normally associated with these relatively simple operations.

Ancestors

  • reciprocalspaceship.dataset.DataSet
  • pandas.core.frame.DataFrame
  • pandas.core.generic.NDFrame
  • pandas.core.base.PandasObject
  • pandas.core.accessor.DirNamesMixin
  • pandas.core.indexing.IndexingMixin
  • pandas.core.arraylike.OpsMixin

Static methods

def from_3d_numpy_map(cls,
map_grid: np.ndarray,
*,
spacegroup: Any,
cell: CellType,
high_resolution_limit: float) ‑> Map
Expand source code
@classmethod
@cellify("cell")
def from_3d_numpy_map(
    cls, map_grid: np.ndarray, *, spacegroup: Any, cell: CellType, high_resolution_limit: float
) -> Map:
    """
    Create a `Map` from a 3d grid of voxel values stored in a numpy array.

    Parameters
    ----------
    map_grid: np.ndarray
        The array, laid out in Gemmi format
    spacegroup: Any
        Specifies which spacegroup, can be an int, gemmi.SpaceGroup, ...
    cell
        Specifies cell, can be a tuple, gemmi.Cell, ...
    high_resolution_limit: float
        The resolution of the map, irregardless of the sampling; we need this to infer the map
        sampling

    Returns
    -------
    map: Map
        The map coefficients

    See Also
    --------
    For information about Gemmi data layout: https://gemmi.readthedocs.io/en/latest/grid.html
    """
    if len(map_grid.shape) != NUMBER_OF_DIMENSIONS_IN_UNIVERSE:
        msg = "`map_grid` should be a 3D array representing a realspace map"
        raise ValueError(msg)
    ccp4 = numpy_array_to_map(
        map_grid,
        spacegroup=spacegroup,
        cell=cell,
    )
    return cls.from_ccp4_map(
        ccp4_map=ccp4,
        high_resolution_limit=high_resolution_limit,
    )

Create a Map from a 3d grid of voxel values stored in a numpy array.

Parameters

map_grid : np.ndarray
The array, laid out in Gemmi format
spacegroup : Any
Specifies which spacegroup, can be an int, gemmi.SpaceGroup, …
cell
Specifies cell, can be a tuple, gemmi.Cell, …
high_resolution_limit : float
The resolution of the map, irregardless of the sampling; we need this to infer the map sampling

Returns

map : Map
The map coefficients

See Also

For information about Gemmi data layout: <https://gemmi.readthedocs.io/en/latest/grid.html>

def from_ccp4_map(ccp4_map: gemmi.Ccp4Map,
*,
high_resolution_limit: float,
amplitude_column: str = 'F',
phase_column: str = 'PHI') ‑> Map
def from_gemmi(gemmi_mtz: gemmi.Mtz,
*,
amplitude_column: str = 'F',
phase_column: str = 'PHI',
uncertainty_column: str | None = 'SIGF') ‑> Map

Creates DataSet object from gemmi.Mtz object.

If the gemmi.Mtz object contains an M/ISYM column and contains duplicated Miller indices, an unmerged DataSet will be constructed. The Miller indices will be mapped to their observed values, and a partiality flag will be extracted and stored as a boolean column with the label, PARTIAL. Otherwise, a merged DataSet will be constructed.

If columns are found with the MTZInt dtype and are labeled PARTIAL or CENTRIC, these will be interpreted as boolean flags used to label partial or centric reflections, respectively.

Parameters

gemmiMtz : gemmi.Mtz
 

Returns

DataSet
 
def from_structurefactor(cls,
complex_structurefactor: np.ndarray | rs.DataSeries,
*,
index: pd.Index | None = None,
cell: CellType | None = None,
spacegroup: SpacegroupType | None = None) ‑> Map
Expand source code
@classmethod
@cellify("cell")
@spacegroupify("spacegroup")
def from_structurefactor(
    cls,
    complex_structurefactor: np.ndarray | rs.DataSeries,
    *,
    index: pd.Index | None = None,
    cell: CellType | None = None,
    spacegroup: SpacegroupType | None = None,
) -> Map:
    # 1. `rs.DataSet.from_structurefactor` exists, but it operates on a column that's already
    #    part of the dataset; having such a (redundant) column is forbidden by `Map`
    # 2. recprocalspaceship has a `from_structurefactor` method, but it is occasionally
    #    mangling indices for me when the input is a numpy array, as of 16 OCT 24
    #
    # hopefully we can resolve these and reuse code! - @tjlane

    if index is None:
        if isinstance(complex_structurefactor, rs.DataSeries) and hasattr(
            complex_structurefactor, "index"
        ):
            index = complex_structurefactor.index

        else:
            msg = "if `complex_structurefactor` is not a `DataSeries` with an `index` attribute"
            msg += ", and index must be provided"
            raise ValueError(msg)

    elif index.shape != complex_structurefactor.shape:
        msg = f"`complex_structurefactor` {complex_structurefactor.shape} does not have same "
        msg += f"shape as `index` {index.shape}"
        raise ShapeMismatchError(msg)

    amplitudes = rs.DataSeries(
        np.abs(complex_structurefactor),
        index=index,
        dtype=rs.StructureFactorAmplitudeDtype(),
        name="F",
    )

    phases = rs.DataSeries(
        np.angle(complex_structurefactor, deg=True),
        index=index,
        dtype=rs.PhaseDtype(),
        name="PHI",
    )

    dataset = rs.DataSet(
        rs.concat([amplitudes, phases], axis=1),
        index=index,
        cell=cell,
        spacegroup=spacegroup,
    )

    return cls(dataset)
def read_mtz_file(file_path: str | Path,
*,
amplitude_column: str = 'F',
phase_column: str = 'PHI',
uncertainty_column: str | None = 'SIGF') ‑> Map

Instance variables

prop amplitude_column_name : str
Expand source code
@property
def amplitude_column_name(self) -> str:
    return self._amplitude_column
prop amplitudes : rs.DataSeries
Expand source code
@property
def amplitudes(self) -> rs.DataSeries:
    return self[self._amplitude_column]
prop has_uncertainties : bool
Expand source code
@property
def has_uncertainties(self) -> bool:
    return self._uncertainty_column in self.columns
prop phase_column_name : str
Expand source code
@property
def phase_column_name(self) -> str:
    return self._phase_column
prop phases : rs.DataSeries
Expand source code
@property
def phases(self) -> rs.DataSeries:
    return self[self._phase_column]
prop resolution_limits : tuple[float, float]
Expand source code
@property
def resolution_limits(self) -> tuple[float, float]:
    d_hkl = self.compute_dHKL()
    return np.max(d_hkl), np.min(d_hkl)
prop uncertainties : rs.DataSeries
Expand source code
@property
def uncertainties(self) -> rs.DataSeries:
    if self.has_uncertainties:
        return self[self._uncertainty_column]
    msg = "uncertainties not set for Map object"
    raise AttributeError(msg)
prop uncertainties_column_name : str
Expand source code
@property
def uncertainties_column_name(self) -> str:
    if self.has_uncertainties:
        if not isinstance(self._uncertainty_column, str):
            msg = "misconfigured uncertainty column"
            raise RuntimeError(msg)
        return self._uncertainty_column
    msg = "uncertainties not set for Map object"
    raise AttributeError(msg)

Methods

def canonicalize_amplitudes(self) ‑> None
Expand source code
def canonicalize_amplitudes(self) -> None:
    canonicalize_amplitudes(
        self,
        amplitude_column=self._amplitude_column,
        phase_column=self._phase_column,
        inplace=True,
    )
def compute_dHKL(self) ‑> reciprocalspaceship.dataseries.DataSeries
Expand source code
def compute_dHKL(self) -> rs.DataSeries:  # noqa: N802, caps from reciprocalspaceship
    # rs adds a "dHKL" column to the DataFrame
    # that could be enabled by adding "dHKL" to _allowed_columns - @tjlane
    if not hasattr(self, "cell"):
        msg = "no `cell` attribute set, cannot compute resolution (d-values)"
        raise AttributeError(msg)
    d_hkl = self.cell.calculate_d_array(self.get_hkls())
    return rs.DataSeries(d_hkl, dtype="R", index=self.index)

Compute the real space lattice plane spacing, d, associated with the HKL indices in the object.

Parameters

inplace : bool
Whether to add the column in place or return a copy
def drop(self, labels: Any, *, inplace: bool = False) ‑> None | Map
Expand source code
def drop(self, labels: Any, *, inplace: bool = False) -> None | Map:
    return super().drop(labels=labels, axis="index", columns=None, inplace=inplace)

Drop specified labels from rows or columns.

Remove rows or columns by specifying label names and corresponding axis, or by directly specifying index or column names. When using a multi-index, labels on different levels can be removed by specifying the level. See the :ref:user guide <advanced.shown_levels> for more information about the now unused levels.

Parameters

labels : single label or list-like
Index or column labels to drop. A tuple will be used as a single label and not treated as a list-like.
axis : {0 or 'index', 1 or 'columns'}, default 0
Whether to drop labels from the index (0 or 'index') or columns (1 or 'columns').
index : single label or list-like
Alternative to specifying axis (labels, axis=0 is equivalent to index=labels).
columns : single label or list-like
Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
level : int or level name, optional
For MultiIndex, level from which the labels will be removed.
inplace : bool, default False
If False, return a copy. Otherwise, do operation in place and return None.
errors : {'ignore', 'raise'}, default 'raise'
If 'ignore', suppress error and only existing labels are dropped.

Returns

DataFrame or None
Returns DataFrame or None DataFrame with the specified index or column labels removed or None if inplace=True.

Raises

KeyError
If any of the labels is not found in the selected axis.

See Also

DataFrame.loc
Label-location based indexer for selection by label.
DataFrame.dropna
Return DataFrame with labels on given axis omitted where (all or any) data are missing.
DataFrame.drop_duplicates
Return DataFrame with duplicate rows removed, optionally only considering certain columns.
Series.drop
Return Series with specified index labels removed.

Examples

>>> df = pd.DataFrame(np.arange(12).reshape(3, 4),
...                   columns=['A', 'B', 'C', 'D'])
>>> df
   A  B   C   D
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11

Drop columns

>>> df.drop(['B', 'C'], axis=1)
   A   D
0  0   3
1  4   7
2  8  11
>>> df.drop(columns=['B', 'C'])
   A   D
0  0   3
1  4   7
2  8  11

Drop a row by index

>>> df.drop([0, 1])
   A  B   C   D
2  8  9  10  11

Drop columns and/or rows of MultiIndex DataFrame

>>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'],
...                              ['speed', 'weight', 'length']],
...                      codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
...                             [0, 1, 2, 0, 1, 2, 0, 1, 2]])
>>> df = pd.DataFrame(index=midx, columns=['big', 'small'],
...                   data=[[45, 30], [200, 100], [1.5, 1], [30, 20],
...                         [250, 150], [1.5, 0.8], [320, 250],
...                         [1, 0.8], [0.3, 0.2]])
>>> df
                big     small
llama   speed   45.0    30.0
        weight  200.0   100.0
        length  1.5     1.0
cow     speed   30.0    20.0
        weight  250.0   150.0
        length  1.5     0.8
falcon  speed   320.0   250.0
        weight  1.0     0.8
        length  0.3     0.2

Drop a specific index combination from the MultiIndex DataFrame, i.e., drop the combination 'falcon' and 'weight', which deletes only the corresponding row

>>> df.drop(index=('falcon', 'weight'))
                big     small
llama   speed   45.0    30.0
        weight  200.0   100.0
        length  1.5     1.0
cow     speed   30.0    20.0
        weight  250.0   150.0
        length  1.5     0.8
falcon  speed   320.0   250.0
        length  0.3     0.2
>>> df.drop(index='cow', columns='small')
                big
llama   speed   45.0
        weight  200.0
        length  1.5
falcon  speed   320.0
        weight  1.0
        length  0.3
>>> df.drop(index='length', level=1)
                big     small
llama   speed   45.0    30.0
        weight  200.0   100.0
cow     speed   30.0    20.0
        weight  250.0   150.0
falcon  speed   320.0   250.0
        weight  1.0     0.8
def get_hkls(self) ‑> numpy.ndarray
Expand source code
def get_hkls(self) -> np.ndarray:
    # overwrite rs implt'n, return w/o modifying self -> same behavior, under testing - @tjlane
    # this is a rather horrible thing to do and we should fix it
    # best is to push changes upstream
    hkl_names = ["H", "K", "L"]
    if self.index.names == hkl_names:
        hkls = self.index.to_frame().to_numpy(dtype=np.int32)
    elif all(col in self.columns for col in hkl_names):
        # we need to pull out each column as a separate DataSeries so that we don't try to
        # create a new Map object without F, PHI
        hkls = np.vstack([self[col].to_numpy(dtype=np.int32) for col in hkl_names]).T
    else:
        msg = f"cannot find `H`, `K`, and `L` columns in index or columns {self.columns}"
        raise ValueError(msg)

    if hkls.shape[-1] != NUMBER_OF_DIMENSIONS_IN_UNIVERSE:
        msg = f"something went wrong, HKL array has a funny shape: {hkls.shape}"
        raise RuntimeError(msg)

    return hkls

Get the Miller indices in the DataSet as a ndarray.

Returns

hkl : ndarray, shape=(n_reflections, 3)
Miller indices in DataSet
def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool = False) ‑> None
Expand source code
def insert(self, loc: int, column: str, value: Any, *, allow_duplicates: bool = False) -> None:
    if column in self._allowed_columns:
        super().insert(loc, column, value, allow_duplicates=allow_duplicates)
    else:
        msg = "general column assignment not allowed for Map objects"
        msg += f"special columns allowed: {self._allowed_columns}; "
        msg += "see also Map.set_uncertainties(...)"
        raise MapMutabilityError(msg)

Insert column into DataFrame at specified location.

Raises a ValueError if column is already contained in the DataFrame, unless allow_duplicates is set to True.

Parameters

loc : int
Insertion index. Must verify 0 <= loc <= len(columns).
column : str, number, or hashable object
Label of the inserted column.
value : Scalar, Series, or array-like
Content of the inserted column.
allow_duplicates : bool, optional, default lib.no_default
Allow duplicate column labels to be created.

See Also

Index.insert
Insert new item by index.

Examples

>>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
>>> df
   col1  col2
0     1     3
1     2     4
>>> df.insert(1, "newcol", [99, 99])
>>> df
   col1  newcol  col2
0     1      99     3
1     2      99     4
>>> df.insert(0, "col1", [100, 100], allow_duplicates=True)
>>> df
   col1  col1  newcol  col2
0   100     1      99     3
1   100     2      99     4

Notice that pandas uses index alignment in case of value from type Series:

>>> df.insert(0, "col0", pd.Series([5, 6], index=[1, 2]))
>>> df
   col0  col1  col1  newcol  col2
0   NaN   100     1      99     3
1   5.0   100     2      99     4
def set_uncertainties(self, values: rs.DataSeries, column_name: str = 'SIGF') ‑> None
Expand source code
def set_uncertainties(self, values: rs.DataSeries, column_name: str = "SIGF") -> None:
    values = self._verify_uncertainty_type(values)

    if self.has_uncertainties:
        self.uncertainties = values
    else:
        # otherwise, create a new column
        self._uncertainty_column = column_name
        number_of_columns = len(self.columns)
        number_of_columns_with_just_amplitudes_and_phases = 2
        if number_of_columns != number_of_columns_with_just_amplitudes_and_phases:
            msg = "Misconfigured columns"
            raise RuntimeError(msg)
        super().insert(
            number_of_columns, self._uncertainty_column, values, allow_duplicates=False
        )
def to_ccp4_map(self, *, map_sampling: int) ‑> gemmi.Ccp4Map
Expand source code
def to_ccp4_map(self, *, map_sampling: int) -> gemmi.Ccp4Map:
    map_coefficients_gemmi_format = self.to_gemmi()
    ccp4_map = gemmi.Ccp4Map()
    ccp4_map.grid = map_coefficients_gemmi_format.transform_f_phi_to_map(
        self._amplitude_column,
        self._phase_column,
        sample_rate=map_sampling,
    )
    ccp4_map.update_ccp4_header()
    return ccp4_map
def to_structurefactor(self) ‑> reciprocalspaceship.dataseries.DataSeries
Expand source code
def to_structurefactor(self) -> rs.DataSeries:
    """Return a DataSeries of complex structure factor amplitudes"""
    return super().to_structurefactor(self._amplitude_column, self._phase_column)

Return a DataSeries of complex structure factor amplitudes

def write_mtz(self, file_path: str | Path) ‑> None
Expand source code
def write_mtz(self, file_path: str | Path) -> None:
    path_cast_to_str = str(file_path)
    super().write_mtz(path_cast_to_str)

Write DataSet to MTZ file.

If DataSet.merged == False, the reflections will be mapped to the reciprocal space ASU, and a M/ISYM column will be constructed.

If boolean flags with the label PARTIAL or CENTRIC are found in the DataSet, these will be cast to the MTZInt dtype, and included in the output MTZ file.

Parameters

mtzfile : str or file
name of an mtz file or a file object
skip_problem_mtztypes : bool
Whether to skip columns in DataSet that do not have specified MTZ datatypes
project_name : str
Project name to assign to MTZ file
crystal_name : str
Crystal name to assign to MTZ file
dataset_name : str
Dataset name to assign to MTZ file
class MapMutabilityError (*args, **kwargs)
Expand source code
class MapMutabilityError(RuntimeError): ...

Unspecified run-time error.

Ancestors

  • builtins.RuntimeError
  • builtins.Exception
  • builtins.BaseException
class MissingUncertaintiesError (*args, **kwargs)
Expand source code
class MissingUncertaintiesError(AttributeError): ...

Attribute not found.

Ancestors

  • builtins.AttributeError
  • builtins.Exception
  • builtins.BaseException