Example Usage
Example 1: Compute a Few Metrics
To compute specific metrics, specify the dimensions you want:
from iot_dqa.utils.enums import Dimension
dqs = DataQualityScore(
file_path="path/to/your/data.csv",
col_mapping={
"date": "date_column_name",
"value": "value_column_name",
},
dimensions=[Dimension.VALIDITY.value, Dimension.TIMELINESS.value],
)
metrics = dqs.compute_metrics()
print(metrics)
Example 2: Adjust Configuration
You can adjust the configuration for metrics computation:
from iot_dqa.utils.configs import MetricsConfig, AccuracyConfig
custom_config = MetricsConfig(
accuracy=AccuracyConfig(ensemble=True, algorithms=["z_score", "iqr"])
)
dqs = DataQualityScore(
file_path="path/to/your/data.csv",
col_mapping={
"date": "date_column_name",
"value": "value_column_name",
},
metrics_config=custom_config,
)
metrics = dqs.compute_metrics()
print(metrics)
Example 3: Compute Score
To compute the overall data quality score:
from iot_dqa.utils.enums import WeightingMechanism, OutputFormat
dqs = DataQualityScore(
file_path="path/to/your/data.csv",
col_mapping={
"date": "date_column_name",
"value": "value_column_name",
"id": "device_id_column_name",
},
)
scores = dqs.compute_score(
weighting_mechanism=WeightingMechanism.EQUAL.value,
output_format=OutputFormat.CSV.value,
output_path="./output",
)
print(scores)
Example 4: Export Score to File/GeoJSON/CSV
To export the computed scores to a file:
dqs = DataQualityScore(
file_path="path/to/your/data.csv",
col_mapping={
"date": "date_column_name",
"value": "value_column_name",
"id": "device_id_column_name",
},
)
scores = dqs.compute_score(
output_format="geojson", # Options: "csv", "geojson"
output_path="./output",
export=True,
)
print("Scores exported successfully.")
Example 5: AHP Weighting Example
To compute the data quality score using AHP (Analytic Hierarchy Process) weighting:
from iot_dqa.utils.enums import WeightingMechanism
dqs = DataQualityScore(
file_path="path/to/your/data.csv",
col_mapping={
"date": "date_column_name",
"value": "value_column_name",
"id": "device_id_column_name",
},
)
ahp_weights = {
"validity": 0.4,
"accuracy": 0.3,
"completeness": 0.2,
"timeliness": 0.1,
}
scores = dqs.compute_score(
weighting_mechanism=WeightingMechanism.AHP.value,
ahp_weights=ahp_weights,
output_format="csv",
output_path="./output",
)
print(scores)
Example 6: Isolation Forest for Outlier Detection
from iot_dqa.utils.configs import MetricsConfig, AccuracyConfig
from sklearn.ensemble import IsolationForest
# Define custom metrics configuration with Isolation Forest
custom_metrics_config = MetricsConfig(
accuracy=AccuracyConfig(
ensemble=False,
algorithms=["if"],
isolation_forest={"n_estimators": 100, "max_samples": "auto", "random_state": 42}
)
)
# Initialize DataQualityScore with Isolation Forest configuration
dqs = DataQualityScore(
file_path="data/sample.csv",
col_mapping={"date": "timestamp", "value": "sensor_value", "id": "device_id"},
metrics_config=custom_metrics_config,
dimensions=["accuracy"]
)
# Compute metrics
metrics = dqs.compute_metrics()
print(metrics)
Example 7: Timeliness with Custom Inter-Arrival Time Method
from iot_dqa.utils.configs import MetricsConfig, TimelinessConfig
from iot_dqa.utils.enums import FrequencyCalculationMethod
# Define custom metrics configuration for timeliness
custom_metrics_config = MetricsConfig(
timeliness=TimelinessConfig(
iat_method=FrequencyCalculationMethod.MODE.value
)
)
# Initialize DataQualityScore with custom timeliness configuration
dqs = DataQualityScore(
file_path="data/sample.csv",
col_mapping={"date": "timestamp", "value": "sensor_value", "id": "device_id"},
metrics_config=custom_metrics_config,
dimensions=["timeliness"],
multiple_devices=True
)
# Compute metrics
metrics = dqs.compute_metrics()
print(metrics)
Example 8: Inter-Quartile Range (IQR) with Optuna Optimization
from iot_dqa.utils.configs import MetricsConfig, AccuracyConfig
# Define custom metrics configuration with IQR and Optuna optimization
custom_metrics_config = MetricsConfig(
accuracy=AccuracyConfig(
ensemble=False,
algorithms=["iqr"],
optimize_iqr_with_optuna=True,
iqr_optuna_trials=50,
iqr_optuna_q1_min=0.1,
iqr_optuna_q1_max=0.3,
iqr_optuna_q3_min=0.7,
iqr_optuna_q3_max=0.9
)
)
# Initialize DataQualityScore with IQR configuration
dqs = DataQualityScore(
file_path="data/sample.csv",
col_mapping={"date": "timestamp", "value": "sensor_value", "id": "device_id"},
metrics_config=custom_metrics_config,
dimensions=["accuracy"]
)
# Compute metrics
metrics = dqs.compute_metrics()
print(metrics)