graylog updates, test.debyl.io, scripts for reference

This commit is contained in:
Bastian de Byl
2026-01-13 16:08:38 -05:00
parent 364047558c
commit 34b45853e2
12 changed files with 1136 additions and 3 deletions

View File

@@ -9,3 +9,5 @@
# SSL certificates are now handled automatically by Caddy
# - role: ssl # REMOVED - Caddy handles all certificate management
- role: github-actions
- role: graylog-config
tags: graylog-config

View File

@@ -0,0 +1,170 @@
---
# Graylog API Configuration
graylog_api_url: "https://logs.debyl.io/api"
# graylog_api_token: defined in vault
# Default index set for new streams (Default Stream index set)
graylog_default_index_set: "6955a9d3cc3f442e78805871"
# Stream definitions
graylog_streams:
- title: "debyltech-api"
description: "Lambda API events from debyltech-api service"
rules:
- field: "service"
value: "debyltech-api"
type: 1 # EXACT match
inverted: false
- title: "caddy-access"
description: "Web traffic access logs from Caddy"
rules:
- field: "source"
value: "caddy"
type: 1
inverted: false
- field: "log_type"
value: "access"
type: 1
inverted: false
- title: "caddy-fulfillr"
description: "Fulfillr-specific web traffic"
rules:
- field: "source"
value: "caddy"
type: 1
inverted: false
- field: "tag"
value: "caddy.fulfillr"
type: 1
inverted: false
- title: "ssh-security"
description: "SSH access and security logs"
rules:
- field: "source"
value: "sshd"
type: 1
inverted: false
- title: "container-logs"
description: "Container stdout/stderr from Podman"
rules:
- field: "source"
value: "podman"
type: 1
inverted: false
# Pipeline definitions
graylog_pipelines:
- title: "GeoIP Enrichment"
description: "Add geographic information to access logs"
stages:
- stage: 0
match: "EITHER"
rules:
- "geoip_caddy_access"
- title: "Debyltech Event Classification"
description: "Categorize debyltech-api events"
stages:
- stage: 0
match: "EITHER"
rules:
- "classify_order_events"
- "classify_review_events"
- "classify_backinstock_events"
- "classify_shipping_events"
- "classify_product_events"
- stage: 1
match: "EITHER"
rules:
- "classify_default_events"
# Pipeline rule definitions
graylog_pipeline_rules:
- title: "geoip_caddy_access"
description: "GeoIP lookup for Caddy access logs"
source: |
rule "GeoIP for Caddy Access"
when
has_field("request_remote_ip")
then
let ip = to_string($message.request_remote_ip);
let geo = lookup("geoip-lookup", ip);
set_field("geo_country", geo["country"].iso_code);
set_field("geo_city", geo["city"].names.en);
set_field("geo_coordinates", geo["coordinates"]);
end
- title: "classify_order_events"
description: "Classify order events"
source: |
rule "Classify order events"
when
has_field("event") AND contains(to_string($message.event), "order")
then
set_field("event_category", "order");
end
- title: "classify_review_events"
description: "Classify review events"
source: |
rule "Classify review events"
when
has_field("event") AND contains(to_string($message.event), "review")
then
set_field("event_category", "review");
end
- title: "classify_backinstock_events"
description: "Classify back-in-stock events"
source: |
rule "Classify back-in-stock events"
when
has_field("event") AND contains(to_string($message.event), "backinstock")
then
set_field("event_category", "backinstock");
end
- title: "classify_shipping_events"
description: "Classify shipping events"
source: |
rule "Classify shipping events"
when
has_field("event") AND contains(to_string($message.event), "shipping")
then
set_field("event_category", "shipping");
end
- title: "classify_product_events"
description: "Classify product events"
source: |
rule "Classify product events"
when
has_field("event") AND contains(to_string($message.event), "product")
then
set_field("event_category", "product");
end
- title: "classify_default_events"
description: "Default category for unclassified events"
source: |
rule "Classify default events"
when
has_field("event") AND NOT has_field("event_category")
then
set_field("event_category", "other");
end
# Pipeline to stream connections
graylog_pipeline_connections:
- pipeline: "GeoIP Enrichment"
streams:
- "caddy-access"
- "caddy-fulfillr"
- pipeline: "Debyltech Event Classification"
streams:
- "debyltech-api"

View File

@@ -0,0 +1,187 @@
---
# Graylog Lookup Table Management via REST API
# Creates Data Adapters, Caches, and Lookup Tables for GeoIP
# =============================================================================
# Data Adapters
# =============================================================================
- name: get existing data adapters
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/adapters"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: existing_adapters
tags: graylog-config, lookup-tables
- name: build list of existing adapter names
ansible.builtin.set_fact:
existing_adapter_names: "{{ existing_adapters.json.data_adapters | default([]) | map(attribute='name') | list }}"
tags: graylog-config, lookup-tables
- name: create GeoIP data adapter
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/adapters"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
name: "geoip-adapter"
title: "GeoIP MaxMind Adapter"
description: "MaxMind GeoLite2-City database adapter"
config:
type: "maxmind_geoip"
path: "/usr/share/graylog/geoip/GeoLite2-City.mmdb"
database_type: "MAXMIND_CITY"
check_interval: 86400
check_interval_unit: "SECONDS"
status_code: [200, 201]
when: "'geoip-adapter' not in existing_adapter_names"
register: created_adapter
tags: graylog-config, lookup-tables
# =============================================================================
# Caches
# =============================================================================
- name: get existing caches
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/caches"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: existing_caches
tags: graylog-config, lookup-tables
- name: build list of existing cache names
ansible.builtin.set_fact:
existing_cache_names: "{{ existing_caches.json.caches | default([]) | map(attribute='name') | list }}"
tags: graylog-config, lookup-tables
- name: create GeoIP cache
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/caches"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
name: "geoip-cache"
title: "GeoIP Cache"
description: "Cache for GeoIP lookups"
config:
type: "guava_cache"
max_size: 10000
expire_after_access: 3600
expire_after_access_unit: "SECONDS"
expire_after_write: 0
expire_after_write_unit: "SECONDS"
status_code: [200, 201]
when: "'geoip-cache' not in existing_cache_names"
register: created_cache
tags: graylog-config, lookup-tables
# =============================================================================
# Lookup Tables
# =============================================================================
- name: refresh adapters list
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/adapters"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: all_adapters
tags: graylog-config, lookup-tables
- name: refresh caches list
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/caches"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: all_caches
tags: graylog-config, lookup-tables
- name: build adapter and cache ID maps
ansible.builtin.set_fact:
adapter_id_map: "{{ all_adapters.json.data_adapters | default([]) | items2dict(key_name='name', value_name='id') }}"
cache_id_map: "{{ all_caches.json.caches | default([]) | items2dict(key_name='name', value_name='id') }}"
tags: graylog-config, lookup-tables
- name: get existing lookup tables
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/tables"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: existing_tables
tags: graylog-config, lookup-tables
- name: build list of existing table names
ansible.builtin.set_fact:
existing_table_names: "{{ existing_tables.json.lookup_tables | default([]) | map(attribute='name') | list }}"
tags: graylog-config, lookup-tables
- name: create GeoIP lookup table
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/lookup/tables"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
name: "geoip-lookup"
title: "GeoIP Lookup Table"
description: "Lookup table for GeoIP resolution"
cache_id: "{{ cache_id_map['geoip-cache'] }}"
data_adapter_id: "{{ adapter_id_map['geoip-adapter'] }}"
default_single_value: ""
default_single_value_type: "NULL"
default_multi_value: ""
default_multi_value_type: "NULL"
status_code: [200, 201]
when:
- "'geoip-lookup' not in existing_table_names"
- "'geoip-adapter' in adapter_id_map"
- "'geoip-cache' in cache_id_map"
tags: graylog-config, lookup-tables

View File

@@ -0,0 +1,15 @@
---
# Graylog Configuration via REST API
# Configures lookup tables, streams, pipelines, and pipeline rules
- name: include lookup table configuration
ansible.builtin.include_tasks: lookup_tables.yml
tags: graylog-config, lookup-tables
- name: include stream configuration
ansible.builtin.include_tasks: streams.yml
tags: graylog-config, streams
- name: include pipeline configuration
ansible.builtin.include_tasks: pipelines.yml
tags: graylog-config, pipelines

View File

@@ -0,0 +1,188 @@
---
# Graylog Pipeline Management via REST API
# Idempotent: checks for existing pipelines/rules before creating
# =============================================================================
# Pipeline Rules
# =============================================================================
- name: get existing pipeline rules
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/rule"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: existing_rules
tags: graylog-config, pipelines
- name: build list of existing rule titles
ansible.builtin.set_fact:
existing_rule_titles: "{{ existing_rules.json | map(attribute='title') | list }}"
existing_rule_map: "{{ existing_rules.json | items2dict(key_name='title', value_name='id') }}"
tags: graylog-config, pipelines
- name: create pipeline rules
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/rule"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
title: "{{ item.title }}"
description: "{{ item.description | default('') }}"
source: "{{ item.source }}"
status_code: [200, 201]
loop: "{{ graylog_pipeline_rules }}"
loop_control:
label: "{{ item.title }}"
when: item.title not in existing_rule_titles
register: created_rules
tags: graylog-config, pipelines
- name: refresh rule list after creation
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/rule"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: all_rules
tags: graylog-config, pipelines
- name: build rule ID lookup
ansible.builtin.set_fact:
rule_id_map: "{{ all_rules.json | items2dict(key_name='title', value_name='id') }}"
tags: graylog-config, pipelines
# =============================================================================
# Pipelines
# =============================================================================
- name: get existing pipelines
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/pipeline"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: existing_pipelines
tags: graylog-config, pipelines
- name: build list of existing pipeline titles
ansible.builtin.set_fact:
existing_pipeline_titles: "{{ existing_pipelines.json | map(attribute='title') | list }}"
existing_pipeline_map: "{{ existing_pipelines.json | items2dict(key_name='title', value_name='id') }}"
tags: graylog-config, pipelines
- name: build pipeline source for each pipeline
ansible.builtin.set_fact:
pipeline_sources: "{{ pipeline_sources | default({}) | combine({item.title: lookup('template', 'pipeline_source.j2')}) }}"
loop: "{{ graylog_pipelines }}"
loop_control:
label: "{{ item.title }}"
vars:
pipeline: "{{ item }}"
tags: graylog-config, pipelines
- name: create pipelines
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/pipeline"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
title: "{{ item.title }}"
description: "{{ item.description | default('') }}"
source: "{{ pipeline_sources[item.title] }}"
status_code: [200, 201]
loop: "{{ graylog_pipelines }}"
loop_control:
label: "{{ item.title }}"
when: item.title not in existing_pipeline_titles
register: created_pipelines
tags: graylog-config, pipelines
- name: refresh pipeline list after creation
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/pipeline"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: all_pipelines
tags: graylog-config, pipelines
- name: build pipeline ID lookup
ansible.builtin.set_fact:
pipeline_id_map: "{{ all_pipelines.json | items2dict(key_name='title', value_name='id') }}"
tags: graylog-config, pipelines
# =============================================================================
# Pipeline to Stream Connections
# =============================================================================
- name: get current pipeline connections
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/connections"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: current_connections
tags: graylog-config, pipelines
- name: connect pipelines to streams
ansible.builtin.uri:
url: "{{ graylog_api_url }}/system/pipelines/connections/to_stream"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
stream_id: "{{ stream_id_map[item.1] }}"
pipeline_ids:
- "{{ pipeline_id_map[item.0.pipeline] }}"
status_code: [200, 201]
loop: "{{ graylog_pipeline_connections | subelements('streams') }}"
loop_control:
label: "{{ item.0.pipeline }} -> {{ item.1 }}"
when:
- item.0.pipeline in pipeline_id_map
- item.1 in stream_id_map
ignore_errors: true
tags: graylog-config, pipelines

View File

@@ -0,0 +1,127 @@
---
# Graylog Stream Management via REST API
# Idempotent: checks for existing streams before creating
- name: get existing streams
ansible.builtin.uri:
url: "{{ graylog_api_url }}/streams"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: existing_streams
tags: graylog-config, streams
- name: build list of existing stream titles
ansible.builtin.set_fact:
existing_stream_titles: "{{ existing_streams.json.streams | map(attribute='title') | list }}"
existing_stream_map: "{{ existing_streams.json.streams | items2dict(key_name='title', value_name='id') }}"
tags: graylog-config, streams
- name: create streams
ansible.builtin.uri:
url: "{{ graylog_api_url }}/streams"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
title: "{{ item.title }}"
description: "{{ item.description | default('') }}"
index_set_id: "{{ item.index_set_id | default(graylog_default_index_set) }}"
remove_matches_from_default_stream: "{{ item.remove_from_default | default(true) }}"
status_code: [200, 201]
loop: "{{ graylog_streams }}"
loop_control:
label: "{{ item.title }}"
when: item.title not in existing_stream_titles
register: created_streams
tags: graylog-config, streams
- name: refresh stream list after creation
ansible.builtin.uri:
url: "{{ graylog_api_url }}/streams"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
register: all_streams
tags: graylog-config, streams
- name: build stream ID lookup
ansible.builtin.set_fact:
stream_id_map: "{{ all_streams.json.streams | items2dict(key_name='title', value_name='id') }}"
tags: graylog-config, streams
- name: get existing rules for each stream
ansible.builtin.uri:
url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.title] }}/rules"
method: GET
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Accept: application/json
status_code: 200
loop: "{{ graylog_streams }}"
loop_control:
label: "{{ item.title }}"
when: item.title in stream_id_map
register: stream_rules
tags: graylog-config, streams
- name: create stream rules
ansible.builtin.uri:
url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.0.title] }}/rules"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
Content-Type: application/json
body_format: json
body:
field: "{{ item.1.field }}"
value: "{{ item.1.value }}"
type: "{{ item.1.type | default(1) }}"
inverted: "{{ item.1.inverted | default(false) }}"
description: "{{ item.1.description | default('') }}"
status_code: [200, 201]
loop: "{{ graylog_streams | subelements('rules', skip_missing=True) }}"
loop_control:
label: "{{ item.0.title }} - {{ item.1.field }}:{{ item.1.value }}"
when:
- item.0.title in stream_id_map
- stream_rules.results | selectattr('item.title', 'equalto', item.0.title) | map(attribute='json.stream_rules') | first | default([]) | selectattr('field', 'equalto', item.1.field) | selectattr('value', 'equalto', item.1.value) | list | length == 0
tags: graylog-config, streams
- name: start streams
ansible.builtin.uri:
url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.title] }}/resume"
method: POST
user: "{{ graylog_api_token }}"
password: token
force_basic_auth: true
headers:
X-Requested-By: ansible
status_code: [200, 204]
loop: "{{ graylog_streams }}"
loop_control:
label: "{{ item.title }}"
when: item.title in stream_id_map
ignore_errors: true
tags: graylog-config, streams

View File

@@ -0,0 +1,8 @@
pipeline "{{ pipeline.title }}"
{% for stage in pipeline.stages %}
stage {{ stage.stage }} match {{ stage.match | default('EITHER') }}
{% for rule in stage.rules %}
rule "{{ rule }}"
{% endfor %}
{% endfor %}
end

View File

@@ -25,6 +25,7 @@
# Legacy volume mounts removed - Caddy manages certificates automatically
# Mount static site directories
- "/usr/local/share/fulfillr-site:/usr/local/share/fulfillr-site:ro"
- "/usr/local/share/test-site:/srv/test-site:ro"
env:
CADDY_ADMIN: "0.0.0.0:2019"
restart_policy: always

View File

@@ -27,6 +27,17 @@
- caddy
- ssl
- name: create test-site directory
become: true
ansible.builtin.file:
path: /usr/local/share/test-site
state: directory
owner: "{{ ansible_user }}"
group: "{{ ansible_user }}"
mode: '0755'
tags:
- caddy
- name: deploy caddyfile
become: true
ansible.builtin.template:

View File

@@ -346,3 +346,26 @@
format json
}
}
# ============================================================================
# TEST/STAGING SITES
# ============================================================================
# Test Site - test.debyl.io (Public static site hosting, no caching)
test.debyl.io {
import common_headers
root * /srv/test-site
try_files {path} {path}/ /index.html
file_server
# Disable all caching for test sites
header Cache-Control "no-store, no-cache, must-revalidate, max-age=0"
header Pragma "no-cache"
log {
output file /var/log/caddy/test.log
format json
level {{ caddy_log_level }}
}
}

Binary file not shown.

401
scripts/steam-workshop-query.py Executable file
View File

@@ -0,0 +1,401 @@
#!/usr/bin/env python3
"""
Steam Workshop Query Tool for Project Zomboid Mods
Queries Steam API to get mod details including correct Mod IDs with special characters.
Useful for generating properly formatted mod lists for Build 42 servers.
Usage:
# Query individual workshop items (semicolon-separated)
python steam-workshop-query.py "ID1;ID2;ID3"
# Query from a Steam Workshop collection
python steam-workshop-query.py --collection 3625776190
python steam-workshop-query.py --collection "https://steamcommunity.com/sharedfiles/filedetails?id=3625776190"
# Output formats
--json Output raw JSON data
--ansible Output workshop_items and mod_ids strings for ansible config
--report Human-readable report (default)
Examples:
python steam-workshop-query.py "3171167894;3330403100" --ansible
python steam-workshop-query.py --collection 3625776190 --report
"""
import requests
import json
import sys
import time
import re
import argparse
from typing import List, Dict, Optional, Tuple
from datetime import datetime
STEAM_API_DETAILS = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/"
STEAM_API_COLLECTION = "https://api.steampowered.com/ISteamRemoteStorage/GetCollectionDetails/v1/"
BATCH_SIZE = 50 # Conservative batch size to avoid rate limits
DELAY_BETWEEN_BATCHES = 1.0 # seconds
def get_collection_items(collection_id: str) -> List[str]:
"""Fetch all workshop item IDs from a Steam Workshop collection."""
data = {"collectioncount": 1, "publishedfileids[0]": collection_id}
response = requests.post(STEAM_API_COLLECTION, data=data)
response.raise_for_status()
result = response.json()
items = []
collection_details = result.get("response", {}).get("collectiondetails", [])
if not collection_details:
print(f"Warning: No collection found with ID {collection_id}", file=sys.stderr)
return items
for coll in collection_details:
if coll.get("result") != 1:
print(f"Warning: Collection {collection_id} returned error result", file=sys.stderr)
continue
for child in coll.get("children", []):
file_id = child.get("publishedfileid")
if file_id:
items.append(file_id)
return items
def query_workshop_items_batch(item_ids: List[str]) -> List[Dict]:
"""Query Steam API for a batch of workshop item details."""
data = {"itemcount": len(item_ids)}
for i, item_id in enumerate(item_ids):
data[f"publishedfileids[{i}]"] = item_id
response = requests.post(STEAM_API_DETAILS, data=data)
response.raise_for_status()
result = response.json()
return result.get("response", {}).get("publishedfiledetails", [])
def query_all_workshop_items(item_ids: List[str]) -> List[Dict]:
"""Query Steam API for all workshop items, handling batching."""
all_items = []
for i in range(0, len(item_ids), BATCH_SIZE):
batch = item_ids[i:i + BATCH_SIZE]
print(f"Querying batch {i // BATCH_SIZE + 1} ({len(batch)} items)...", file=sys.stderr)
items = query_workshop_items_batch(batch)
all_items.extend(items)
# Delay between batches to avoid rate limiting
if i + BATCH_SIZE < len(item_ids):
time.sleep(DELAY_BETWEEN_BATCHES)
return all_items
def extract_mod_id(item: Dict) -> Optional[str]:
"""
Extract Mod ID(s) from item description.
PZ mods typically include 'Mod ID: xxx' in their description.
Some mods have multiple Mod IDs on separate lines or comma-separated.
"""
description = item.get("description", "")
# Find ALL "Mod ID: xxx" patterns in description (multiple lines)
matches = re.findall(r'Mod ID:\s*([^\r\n]+)', description, re.IGNORECASE)
if not matches:
return None
all_mod_ids = []
for match in matches:
mod_id_str = match.strip().rstrip('.')
# Handle comma or semicolon separated mod IDs on same line
if ',' in mod_id_str:
all_mod_ids.extend([m.strip() for m in mod_id_str.split(',')])
elif ';' in mod_id_str:
all_mod_ids.extend([m.strip() for m in mod_id_str.split(';')])
else:
all_mod_ids.append(mod_id_str)
# Remove empty strings and duplicates while preserving order
seen = set()
unique_ids = []
for mod_id in all_mod_ids:
if mod_id and mod_id not in seen:
seen.add(mod_id)
unique_ids.append(mod_id)
return ';'.join(unique_ids) if unique_ids else None
def check_b42_compatible(item: Dict) -> Tuple[bool, str]:
"""
Check if mod appears to be B42 compatible.
Returns (is_compatible, reason).
"""
title = item.get("title", "").lower()
tags = [t.get("tag", "").lower() for t in item.get("tags", [])]
all_tags_str = " ".join(tags)
# B42 indicators in title or tags
b42_patterns = [
r'\bb42\b',
r'build\s*42',
r'\b42\.\d+',
r'\[b42\]',
r'\(b42\)',
]
for pattern in b42_patterns:
if re.search(pattern, title) or re.search(pattern, all_tags_str):
return True, "B42 mentioned in title/tags"
# Check for B41 only indicators (might not be compatible)
b41_only = re.search(r'\bb41\b.*only', title) or re.search(r'build\s*41\s*only', title)
if b41_only:
return False, "B41 only"
return False, "No B42 indicator found"
def has_special_characters(text: str) -> bool:
"""Check if text contains special characters that need attention."""
special = ["'", '"', "!", "&", "(", ")"]
return any(c in text for c in special)
def extract_collection_id(url_or_id: str) -> str:
"""Extract collection ID from URL or return as-is if already an ID."""
match = re.search(r'[?&]id=(\d+)', url_or_id)
return match.group(1) if match else url_or_id
def format_timestamp(unix_ts: int) -> str:
"""Format Unix timestamp as readable date."""
if not unix_ts:
return "Unknown"
return datetime.fromtimestamp(unix_ts).strftime("%Y-%m-%d")
def process_items(items: List[Dict]) -> Dict:
"""
Process workshop items and extract relevant information.
Returns a dict with processed data and analysis.
"""
processed = []
duplicates = {}
issues = []
for item in items:
workshop_id = item.get("publishedfileid", "unknown")
title = item.get("title", "Unknown")
mod_id = extract_mod_id(item)
b42_compat, b42_reason = check_b42_compatible(item)
last_updated = item.get("time_updated", 0)
result_code = item.get("result", 0)
entry = {
"workshop_id": workshop_id,
"title": title,
"mod_id": mod_id,
"b42_compatible": b42_compat,
"b42_reason": b42_reason,
"last_updated": format_timestamp(last_updated),
"has_special_chars": has_special_characters(mod_id or ""),
"result_code": result_code,
}
# Track duplicates by mod_id
if mod_id:
if mod_id in duplicates:
duplicates[mod_id].append(workshop_id)
else:
duplicates[mod_id] = [workshop_id]
# Track issues
if result_code != 1:
issues.append(f"Workshop item {workshop_id} returned error (result={result_code})")
if not mod_id:
issues.append(f"Workshop item {workshop_id} ({title}) has no Mod ID tag")
if entry["has_special_chars"]:
issues.append(f"Mod ID '{mod_id}' contains special characters")
processed.append(entry)
# Find actual duplicates (mod_id appearing more than once)
duplicate_mod_ids = {k: v for k, v in duplicates.items() if len(v) > 1}
return {
"items": processed,
"duplicates": duplicate_mod_ids,
"issues": issues,
"total_count": len(items),
"valid_count": len([i for i in processed if i["mod_id"]]),
}
def output_report(data: Dict) -> None:
"""Output human-readable report."""
print("\n" + "=" * 80)
print("STEAM WORKSHOP MOD ANALYSIS REPORT")
print("=" * 80)
print(f"\nTotal items: {data['total_count']}")
print(f"Valid items (with Mod ID): {data['valid_count']}")
if data["duplicates"]:
print(f"\n{'=' * 40}")
print("DUPLICATE MOD IDs:")
print(f"{'=' * 40}")
for mod_id, workshop_ids in data["duplicates"].items():
print(f" {mod_id}: {', '.join(workshop_ids)}")
if data["issues"]:
print(f"\n{'=' * 40}")
print("ISSUES:")
print(f"{'=' * 40}")
for issue in data["issues"]:
print(f" - {issue}")
print(f"\n{'=' * 40}")
print("MOD LIST:")
print(f"{'=' * 40}")
for item in data["items"]:
b42_status = "[B42]" if item["b42_compatible"] else "[???]"
special = " [SPECIAL CHARS]" if item["has_special_chars"] else ""
mod_id_display = item["mod_id"] or "<NO MOD ID>"
print(f"\n Workshop: {item['workshop_id']}")
print(f" Title: {item['title']}")
print(f" Mod ID: {mod_id_display}{special}")
print(f" Status: {b42_status} {item['b42_reason']}")
print(f" Updated: {item['last_updated']}")
def output_ansible(data: Dict) -> None:
"""Output ansible-ready configuration strings."""
# Get unique, valid mod IDs (preserving order, removing duplicates)
seen_workshop = set()
seen_mod_ids = set()
workshop_items = []
mod_ids = []
for item in data["items"]:
workshop_id = item["workshop_id"]
mod_id_str = item["mod_id"]
# Skip if we've seen this workshop item
if workshop_id in seen_workshop:
continue
seen_workshop.add(workshop_id)
workshop_items.append(workshop_id)
# Handle mod_id which may contain multiple IDs separated by semicolon
if mod_id_str:
for mod_id in mod_id_str.split(';'):
mod_id = mod_id.strip()
if mod_id and mod_id not in seen_mod_ids:
seen_mod_ids.add(mod_id)
mod_ids.append(mod_id)
# Format for Build 42 (backslash prefix)
workshop_str = ";".join(workshop_items)
mod_ids_str = ";".join(f"\\{mid}" for mid in mod_ids)
print("\n# Ansible Configuration for zomboid_mods")
print("# Copy these values to ansible/roles/podman/defaults/main.yml")
print("")
print("zomboid_mods:")
print(" workshop_items: >-")
print(f" {workshop_str}")
print(" mod_ids: >-")
print(f" {mod_ids_str}")
if data["duplicates"]:
print("\n# WARNING: The following Mod IDs had duplicates (kept first occurrence):")
for mod_id, workshop_ids in data["duplicates"].items():
print(f"# {mod_id}: {', '.join(workshop_ids)}")
if data["issues"]:
print("\n# Issues found:")
for issue in data["issues"]:
print(f"# - {issue}")
def output_json(data: Dict) -> None:
"""Output JSON data."""
print(json.dumps(data, indent=2))
def main():
parser = argparse.ArgumentParser(
description="Query Steam Workshop for Project Zomboid mod details",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument(
"workshop_ids",
nargs="?",
help="Semicolon-separated workshop IDs (e.g., 'ID1;ID2;ID3')"
)
parser.add_argument(
"--collection", "-c",
help="Steam Workshop collection ID or URL"
)
parser.add_argument(
"--json", "-j",
action="store_true",
help="Output raw JSON data"
)
parser.add_argument(
"--ansible", "-a",
action="store_true",
help="Output ansible-ready configuration"
)
parser.add_argument(
"--report", "-r",
action="store_true",
help="Output human-readable report (default)"
)
args = parser.parse_args()
# Determine input source
if args.collection:
collection_id = extract_collection_id(args.collection)
print(f"Fetching collection {collection_id}...", file=sys.stderr)
item_ids = get_collection_items(collection_id)
if not item_ids:
print("Error: No items found in collection", file=sys.stderr)
sys.exit(1)
print(f"Found {len(item_ids)} items in collection", file=sys.stderr)
elif args.workshop_ids:
item_ids = [id.strip() for id in args.workshop_ids.split(";") if id.strip()]
else:
parser.print_help()
sys.exit(1)
# Query Steam API
print(f"Querying {len(item_ids)} workshop items...", file=sys.stderr)
items = query_all_workshop_items(item_ids)
print(f"Retrieved {len(items)} item details", file=sys.stderr)
# Process items
data = process_items(items)
# Output based on format
if args.json:
output_json(data)
elif args.ansible:
output_ansible(data)
else:
output_report(data)
if __name__ == "__main__":
main()