diff --git a/ansible/deploy_home.yml b/ansible/deploy_home.yml index 76185cb..a40b77e 100644 --- a/ansible/deploy_home.yml +++ b/ansible/deploy_home.yml @@ -9,3 +9,5 @@ # SSL certificates are now handled automatically by Caddy # - role: ssl # REMOVED - Caddy handles all certificate management - role: github-actions + - role: graylog-config + tags: graylog-config diff --git a/ansible/roles/graylog-config/defaults/main.yml b/ansible/roles/graylog-config/defaults/main.yml new file mode 100644 index 0000000..2895e1a --- /dev/null +++ b/ansible/roles/graylog-config/defaults/main.yml @@ -0,0 +1,170 @@ +--- +# Graylog API Configuration +graylog_api_url: "https://logs.debyl.io/api" +# graylog_api_token: defined in vault + +# Default index set for new streams (Default Stream index set) +graylog_default_index_set: "6955a9d3cc3f442e78805871" + +# Stream definitions +graylog_streams: + - title: "debyltech-api" + description: "Lambda API events from debyltech-api service" + rules: + - field: "service" + value: "debyltech-api" + type: 1 # EXACT match + inverted: false + + - title: "caddy-access" + description: "Web traffic access logs from Caddy" + rules: + - field: "source" + value: "caddy" + type: 1 + inverted: false + - field: "log_type" + value: "access" + type: 1 + inverted: false + + - title: "caddy-fulfillr" + description: "Fulfillr-specific web traffic" + rules: + - field: "source" + value: "caddy" + type: 1 + inverted: false + - field: "tag" + value: "caddy.fulfillr" + type: 1 + inverted: false + + - title: "ssh-security" + description: "SSH access and security logs" + rules: + - field: "source" + value: "sshd" + type: 1 + inverted: false + + - title: "container-logs" + description: "Container stdout/stderr from Podman" + rules: + - field: "source" + value: "podman" + type: 1 + inverted: false + +# Pipeline definitions +graylog_pipelines: + - title: "GeoIP Enrichment" + description: "Add geographic information to access logs" + stages: + - stage: 0 + match: "EITHER" + rules: + - "geoip_caddy_access" + + - title: "Debyltech Event Classification" + description: "Categorize debyltech-api events" + stages: + - stage: 0 + match: "EITHER" + rules: + - "classify_order_events" + - "classify_review_events" + - "classify_backinstock_events" + - "classify_shipping_events" + - "classify_product_events" + - stage: 1 + match: "EITHER" + rules: + - "classify_default_events" + +# Pipeline rule definitions +graylog_pipeline_rules: + - title: "geoip_caddy_access" + description: "GeoIP lookup for Caddy access logs" + source: | + rule "GeoIP for Caddy Access" + when + has_field("request_remote_ip") + then + let ip = to_string($message.request_remote_ip); + let geo = lookup("geoip-lookup", ip); + set_field("geo_country", geo["country"].iso_code); + set_field("geo_city", geo["city"].names.en); + set_field("geo_coordinates", geo["coordinates"]); + end + + - title: "classify_order_events" + description: "Classify order events" + source: | + rule "Classify order events" + when + has_field("event") AND contains(to_string($message.event), "order") + then + set_field("event_category", "order"); + end + + - title: "classify_review_events" + description: "Classify review events" + source: | + rule "Classify review events" + when + has_field("event") AND contains(to_string($message.event), "review") + then + set_field("event_category", "review"); + end + + - title: "classify_backinstock_events" + description: "Classify back-in-stock events" + source: | + rule "Classify back-in-stock events" + when + has_field("event") AND contains(to_string($message.event), "backinstock") + then + set_field("event_category", "backinstock"); + end + + - title: "classify_shipping_events" + description: "Classify shipping events" + source: | + rule "Classify shipping events" + when + has_field("event") AND contains(to_string($message.event), "shipping") + then + set_field("event_category", "shipping"); + end + + - title: "classify_product_events" + description: "Classify product events" + source: | + rule "Classify product events" + when + has_field("event") AND contains(to_string($message.event), "product") + then + set_field("event_category", "product"); + end + + - title: "classify_default_events" + description: "Default category for unclassified events" + source: | + rule "Classify default events" + when + has_field("event") AND NOT has_field("event_category") + then + set_field("event_category", "other"); + end + +# Pipeline to stream connections +graylog_pipeline_connections: + - pipeline: "GeoIP Enrichment" + streams: + - "caddy-access" + - "caddy-fulfillr" + + - pipeline: "Debyltech Event Classification" + streams: + - "debyltech-api" diff --git a/ansible/roles/graylog-config/tasks/lookup_tables.yml b/ansible/roles/graylog-config/tasks/lookup_tables.yml new file mode 100644 index 0000000..af8430e --- /dev/null +++ b/ansible/roles/graylog-config/tasks/lookup_tables.yml @@ -0,0 +1,187 @@ +--- +# Graylog Lookup Table Management via REST API +# Creates Data Adapters, Caches, and Lookup Tables for GeoIP + +# ============================================================================= +# Data Adapters +# ============================================================================= + +- name: get existing data adapters + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/adapters" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: existing_adapters + tags: graylog-config, lookup-tables + +- name: build list of existing adapter names + ansible.builtin.set_fact: + existing_adapter_names: "{{ existing_adapters.json.data_adapters | default([]) | map(attribute='name') | list }}" + tags: graylog-config, lookup-tables + +- name: create GeoIP data adapter + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/adapters" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + name: "geoip-adapter" + title: "GeoIP MaxMind Adapter" + description: "MaxMind GeoLite2-City database adapter" + config: + type: "maxmind_geoip" + path: "/usr/share/graylog/geoip/GeoLite2-City.mmdb" + database_type: "MAXMIND_CITY" + check_interval: 86400 + check_interval_unit: "SECONDS" + status_code: [200, 201] + when: "'geoip-adapter' not in existing_adapter_names" + register: created_adapter + tags: graylog-config, lookup-tables + +# ============================================================================= +# Caches +# ============================================================================= + +- name: get existing caches + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/caches" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: existing_caches + tags: graylog-config, lookup-tables + +- name: build list of existing cache names + ansible.builtin.set_fact: + existing_cache_names: "{{ existing_caches.json.caches | default([]) | map(attribute='name') | list }}" + tags: graylog-config, lookup-tables + +- name: create GeoIP cache + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/caches" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + name: "geoip-cache" + title: "GeoIP Cache" + description: "Cache for GeoIP lookups" + config: + type: "guava_cache" + max_size: 10000 + expire_after_access: 3600 + expire_after_access_unit: "SECONDS" + expire_after_write: 0 + expire_after_write_unit: "SECONDS" + status_code: [200, 201] + when: "'geoip-cache' not in existing_cache_names" + register: created_cache + tags: graylog-config, lookup-tables + +# ============================================================================= +# Lookup Tables +# ============================================================================= + +- name: refresh adapters list + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/adapters" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: all_adapters + tags: graylog-config, lookup-tables + +- name: refresh caches list + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/caches" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: all_caches + tags: graylog-config, lookup-tables + +- name: build adapter and cache ID maps + ansible.builtin.set_fact: + adapter_id_map: "{{ all_adapters.json.data_adapters | default([]) | items2dict(key_name='name', value_name='id') }}" + cache_id_map: "{{ all_caches.json.caches | default([]) | items2dict(key_name='name', value_name='id') }}" + tags: graylog-config, lookup-tables + +- name: get existing lookup tables + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/tables" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: existing_tables + tags: graylog-config, lookup-tables + +- name: build list of existing table names + ansible.builtin.set_fact: + existing_table_names: "{{ existing_tables.json.lookup_tables | default([]) | map(attribute='name') | list }}" + tags: graylog-config, lookup-tables + +- name: create GeoIP lookup table + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/lookup/tables" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + name: "geoip-lookup" + title: "GeoIP Lookup Table" + description: "Lookup table for GeoIP resolution" + cache_id: "{{ cache_id_map['geoip-cache'] }}" + data_adapter_id: "{{ adapter_id_map['geoip-adapter'] }}" + default_single_value: "" + default_single_value_type: "NULL" + default_multi_value: "" + default_multi_value_type: "NULL" + status_code: [200, 201] + when: + - "'geoip-lookup' not in existing_table_names" + - "'geoip-adapter' in adapter_id_map" + - "'geoip-cache' in cache_id_map" + tags: graylog-config, lookup-tables diff --git a/ansible/roles/graylog-config/tasks/main.yml b/ansible/roles/graylog-config/tasks/main.yml new file mode 100644 index 0000000..319a622 --- /dev/null +++ b/ansible/roles/graylog-config/tasks/main.yml @@ -0,0 +1,15 @@ +--- +# Graylog Configuration via REST API +# Configures lookup tables, streams, pipelines, and pipeline rules + +- name: include lookup table configuration + ansible.builtin.include_tasks: lookup_tables.yml + tags: graylog-config, lookup-tables + +- name: include stream configuration + ansible.builtin.include_tasks: streams.yml + tags: graylog-config, streams + +- name: include pipeline configuration + ansible.builtin.include_tasks: pipelines.yml + tags: graylog-config, pipelines diff --git a/ansible/roles/graylog-config/tasks/pipelines.yml b/ansible/roles/graylog-config/tasks/pipelines.yml new file mode 100644 index 0000000..a76ecd9 --- /dev/null +++ b/ansible/roles/graylog-config/tasks/pipelines.yml @@ -0,0 +1,188 @@ +--- +# Graylog Pipeline Management via REST API +# Idempotent: checks for existing pipelines/rules before creating + +# ============================================================================= +# Pipeline Rules +# ============================================================================= + +- name: get existing pipeline rules + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/rule" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: existing_rules + tags: graylog-config, pipelines + +- name: build list of existing rule titles + ansible.builtin.set_fact: + existing_rule_titles: "{{ existing_rules.json | map(attribute='title') | list }}" + existing_rule_map: "{{ existing_rules.json | items2dict(key_name='title', value_name='id') }}" + tags: graylog-config, pipelines + +- name: create pipeline rules + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/rule" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + title: "{{ item.title }}" + description: "{{ item.description | default('') }}" + source: "{{ item.source }}" + status_code: [200, 201] + loop: "{{ graylog_pipeline_rules }}" + loop_control: + label: "{{ item.title }}" + when: item.title not in existing_rule_titles + register: created_rules + tags: graylog-config, pipelines + +- name: refresh rule list after creation + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/rule" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: all_rules + tags: graylog-config, pipelines + +- name: build rule ID lookup + ansible.builtin.set_fact: + rule_id_map: "{{ all_rules.json | items2dict(key_name='title', value_name='id') }}" + tags: graylog-config, pipelines + +# ============================================================================= +# Pipelines +# ============================================================================= + +- name: get existing pipelines + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/pipeline" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: existing_pipelines + tags: graylog-config, pipelines + +- name: build list of existing pipeline titles + ansible.builtin.set_fact: + existing_pipeline_titles: "{{ existing_pipelines.json | map(attribute='title') | list }}" + existing_pipeline_map: "{{ existing_pipelines.json | items2dict(key_name='title', value_name='id') }}" + tags: graylog-config, pipelines + +- name: build pipeline source for each pipeline + ansible.builtin.set_fact: + pipeline_sources: "{{ pipeline_sources | default({}) | combine({item.title: lookup('template', 'pipeline_source.j2')}) }}" + loop: "{{ graylog_pipelines }}" + loop_control: + label: "{{ item.title }}" + vars: + pipeline: "{{ item }}" + tags: graylog-config, pipelines + +- name: create pipelines + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/pipeline" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + title: "{{ item.title }}" + description: "{{ item.description | default('') }}" + source: "{{ pipeline_sources[item.title] }}" + status_code: [200, 201] + loop: "{{ graylog_pipelines }}" + loop_control: + label: "{{ item.title }}" + when: item.title not in existing_pipeline_titles + register: created_pipelines + tags: graylog-config, pipelines + +- name: refresh pipeline list after creation + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/pipeline" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: all_pipelines + tags: graylog-config, pipelines + +- name: build pipeline ID lookup + ansible.builtin.set_fact: + pipeline_id_map: "{{ all_pipelines.json | items2dict(key_name='title', value_name='id') }}" + tags: graylog-config, pipelines + +# ============================================================================= +# Pipeline to Stream Connections +# ============================================================================= + +- name: get current pipeline connections + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/connections" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: current_connections + tags: graylog-config, pipelines + +- name: connect pipelines to streams + ansible.builtin.uri: + url: "{{ graylog_api_url }}/system/pipelines/connections/to_stream" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + stream_id: "{{ stream_id_map[item.1] }}" + pipeline_ids: + - "{{ pipeline_id_map[item.0.pipeline] }}" + status_code: [200, 201] + loop: "{{ graylog_pipeline_connections | subelements('streams') }}" + loop_control: + label: "{{ item.0.pipeline }} -> {{ item.1 }}" + when: + - item.0.pipeline in pipeline_id_map + - item.1 in stream_id_map + ignore_errors: true + tags: graylog-config, pipelines diff --git a/ansible/roles/graylog-config/tasks/streams.yml b/ansible/roles/graylog-config/tasks/streams.yml new file mode 100644 index 0000000..4ac974e --- /dev/null +++ b/ansible/roles/graylog-config/tasks/streams.yml @@ -0,0 +1,127 @@ +--- +# Graylog Stream Management via REST API +# Idempotent: checks for existing streams before creating + +- name: get existing streams + ansible.builtin.uri: + url: "{{ graylog_api_url }}/streams" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: existing_streams + tags: graylog-config, streams + +- name: build list of existing stream titles + ansible.builtin.set_fact: + existing_stream_titles: "{{ existing_streams.json.streams | map(attribute='title') | list }}" + existing_stream_map: "{{ existing_streams.json.streams | items2dict(key_name='title', value_name='id') }}" + tags: graylog-config, streams + +- name: create streams + ansible.builtin.uri: + url: "{{ graylog_api_url }}/streams" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + title: "{{ item.title }}" + description: "{{ item.description | default('') }}" + index_set_id: "{{ item.index_set_id | default(graylog_default_index_set) }}" + remove_matches_from_default_stream: "{{ item.remove_from_default | default(true) }}" + status_code: [200, 201] + loop: "{{ graylog_streams }}" + loop_control: + label: "{{ item.title }}" + when: item.title not in existing_stream_titles + register: created_streams + tags: graylog-config, streams + +- name: refresh stream list after creation + ansible.builtin.uri: + url: "{{ graylog_api_url }}/streams" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + register: all_streams + tags: graylog-config, streams + +- name: build stream ID lookup + ansible.builtin.set_fact: + stream_id_map: "{{ all_streams.json.streams | items2dict(key_name='title', value_name='id') }}" + tags: graylog-config, streams + +- name: get existing rules for each stream + ansible.builtin.uri: + url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.title] }}/rules" + method: GET + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Accept: application/json + status_code: 200 + loop: "{{ graylog_streams }}" + loop_control: + label: "{{ item.title }}" + when: item.title in stream_id_map + register: stream_rules + tags: graylog-config, streams + +- name: create stream rules + ansible.builtin.uri: + url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.0.title] }}/rules" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + Content-Type: application/json + body_format: json + body: + field: "{{ item.1.field }}" + value: "{{ item.1.value }}" + type: "{{ item.1.type | default(1) }}" + inverted: "{{ item.1.inverted | default(false) }}" + description: "{{ item.1.description | default('') }}" + status_code: [200, 201] + loop: "{{ graylog_streams | subelements('rules', skip_missing=True) }}" + loop_control: + label: "{{ item.0.title }} - {{ item.1.field }}:{{ item.1.value }}" + when: + - item.0.title in stream_id_map + - stream_rules.results | selectattr('item.title', 'equalto', item.0.title) | map(attribute='json.stream_rules') | first | default([]) | selectattr('field', 'equalto', item.1.field) | selectattr('value', 'equalto', item.1.value) | list | length == 0 + tags: graylog-config, streams + +- name: start streams + ansible.builtin.uri: + url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.title] }}/resume" + method: POST + user: "{{ graylog_api_token }}" + password: token + force_basic_auth: true + headers: + X-Requested-By: ansible + status_code: [200, 204] + loop: "{{ graylog_streams }}" + loop_control: + label: "{{ item.title }}" + when: item.title in stream_id_map + ignore_errors: true + tags: graylog-config, streams diff --git a/ansible/roles/graylog-config/templates/pipeline_source.j2 b/ansible/roles/graylog-config/templates/pipeline_source.j2 new file mode 100644 index 0000000..bc68b2a --- /dev/null +++ b/ansible/roles/graylog-config/templates/pipeline_source.j2 @@ -0,0 +1,8 @@ +pipeline "{{ pipeline.title }}" +{% for stage in pipeline.stages %} +stage {{ stage.stage }} match {{ stage.match | default('EITHER') }} +{% for rule in stage.rules %} +rule "{{ rule }}" +{% endfor %} +{% endfor %} +end \ No newline at end of file diff --git a/ansible/roles/podman/tasks/containers/base/caddy.yml b/ansible/roles/podman/tasks/containers/base/caddy.yml index c4668a4..2b7981f 100644 --- a/ansible/roles/podman/tasks/containers/base/caddy.yml +++ b/ansible/roles/podman/tasks/containers/base/caddy.yml @@ -25,6 +25,7 @@ # Legacy volume mounts removed - Caddy manages certificates automatically # Mount static site directories - "/usr/local/share/fulfillr-site:/usr/local/share/fulfillr-site:ro" + - "/usr/local/share/test-site:/srv/test-site:ro" env: CADDY_ADMIN: "0.0.0.0:2019" restart_policy: always diff --git a/ansible/roles/podman/tasks/containers/base/conf-caddy.yml b/ansible/roles/podman/tasks/containers/base/conf-caddy.yml index f8932fb..a603459 100644 --- a/ansible/roles/podman/tasks/containers/base/conf-caddy.yml +++ b/ansible/roles/podman/tasks/containers/base/conf-caddy.yml @@ -27,6 +27,17 @@ - caddy - ssl +- name: create test-site directory + become: true + ansible.builtin.file: + path: /usr/local/share/test-site + state: directory + owner: "{{ ansible_user }}" + group: "{{ ansible_user }}" + mode: '0755' + tags: + - caddy + - name: deploy caddyfile become: true ansible.builtin.template: diff --git a/ansible/roles/podman/templates/caddy/Caddyfile.j2 b/ansible/roles/podman/templates/caddy/Caddyfile.j2 index d096741..b71d83a 100644 --- a/ansible/roles/podman/templates/caddy/Caddyfile.j2 +++ b/ansible/roles/podman/templates/caddy/Caddyfile.j2 @@ -327,22 +327,45 @@ reverse_proxy localhost:9054 } - + # Serve static files with SPA fallback handle { root * /usr/local/share/fulfillr-site try_files {path} {path}/ /index.html file_server } - + header { Strict-Transport-Security "max-age=31536000; includeSubDomains" X-Content-Type-Options "nosniff" Referrer-Policy "same-origin" } - + log { output file /var/log/caddy/fulfillr.log format json } +} + +# ============================================================================ +# TEST/STAGING SITES +# ============================================================================ + +# Test Site - test.debyl.io (Public static site hosting, no caching) +test.debyl.io { + import common_headers + + root * /srv/test-site + try_files {path} {path}/ /index.html + file_server + + # Disable all caching for test sites + header Cache-Control "no-store, no-cache, must-revalidate, max-age=0" + header Pragma "no-cache" + + log { + output file /var/log/caddy/test.log + format json + level {{ caddy_log_level }} + } } \ No newline at end of file diff --git a/ansible/vars/vault.yml b/ansible/vars/vault.yml index 6133083..d99bc71 100644 Binary files a/ansible/vars/vault.yml and b/ansible/vars/vault.yml differ diff --git a/scripts/steam-workshop-query.py b/scripts/steam-workshop-query.py new file mode 100755 index 0000000..a044468 --- /dev/null +++ b/scripts/steam-workshop-query.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 +""" +Steam Workshop Query Tool for Project Zomboid Mods + +Queries Steam API to get mod details including correct Mod IDs with special characters. +Useful for generating properly formatted mod lists for Build 42 servers. + +Usage: + # Query individual workshop items (semicolon-separated) + python steam-workshop-query.py "ID1;ID2;ID3" + + # Query from a Steam Workshop collection + python steam-workshop-query.py --collection 3625776190 + python steam-workshop-query.py --collection "https://steamcommunity.com/sharedfiles/filedetails?id=3625776190" + + # Output formats + --json Output raw JSON data + --ansible Output workshop_items and mod_ids strings for ansible config + --report Human-readable report (default) + +Examples: + python steam-workshop-query.py "3171167894;3330403100" --ansible + python steam-workshop-query.py --collection 3625776190 --report +""" + +import requests +import json +import sys +import time +import re +import argparse +from typing import List, Dict, Optional, Tuple +from datetime import datetime + +STEAM_API_DETAILS = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/" +STEAM_API_COLLECTION = "https://api.steampowered.com/ISteamRemoteStorage/GetCollectionDetails/v1/" +BATCH_SIZE = 50 # Conservative batch size to avoid rate limits +DELAY_BETWEEN_BATCHES = 1.0 # seconds + + +def get_collection_items(collection_id: str) -> List[str]: + """Fetch all workshop item IDs from a Steam Workshop collection.""" + data = {"collectioncount": 1, "publishedfileids[0]": collection_id} + response = requests.post(STEAM_API_COLLECTION, data=data) + response.raise_for_status() + result = response.json() + + items = [] + collection_details = result.get("response", {}).get("collectiondetails", []) + + if not collection_details: + print(f"Warning: No collection found with ID {collection_id}", file=sys.stderr) + return items + + for coll in collection_details: + if coll.get("result") != 1: + print(f"Warning: Collection {collection_id} returned error result", file=sys.stderr) + continue + for child in coll.get("children", []): + file_id = child.get("publishedfileid") + if file_id: + items.append(file_id) + + return items + + +def query_workshop_items_batch(item_ids: List[str]) -> List[Dict]: + """Query Steam API for a batch of workshop item details.""" + data = {"itemcount": len(item_ids)} + for i, item_id in enumerate(item_ids): + data[f"publishedfileids[{i}]"] = item_id + + response = requests.post(STEAM_API_DETAILS, data=data) + response.raise_for_status() + result = response.json() + + return result.get("response", {}).get("publishedfiledetails", []) + + +def query_all_workshop_items(item_ids: List[str]) -> List[Dict]: + """Query Steam API for all workshop items, handling batching.""" + all_items = [] + + for i in range(0, len(item_ids), BATCH_SIZE): + batch = item_ids[i:i + BATCH_SIZE] + print(f"Querying batch {i // BATCH_SIZE + 1} ({len(batch)} items)...", file=sys.stderr) + + items = query_workshop_items_batch(batch) + all_items.extend(items) + + # Delay between batches to avoid rate limiting + if i + BATCH_SIZE < len(item_ids): + time.sleep(DELAY_BETWEEN_BATCHES) + + return all_items + + +def extract_mod_id(item: Dict) -> Optional[str]: + """ + Extract Mod ID(s) from item description. + PZ mods typically include 'Mod ID: xxx' in their description. + Some mods have multiple Mod IDs on separate lines or comma-separated. + """ + description = item.get("description", "") + + # Find ALL "Mod ID: xxx" patterns in description (multiple lines) + matches = re.findall(r'Mod ID:\s*([^\r\n]+)', description, re.IGNORECASE) + + if not matches: + return None + + all_mod_ids = [] + for match in matches: + mod_id_str = match.strip().rstrip('.') + # Handle comma or semicolon separated mod IDs on same line + if ',' in mod_id_str: + all_mod_ids.extend([m.strip() for m in mod_id_str.split(',')]) + elif ';' in mod_id_str: + all_mod_ids.extend([m.strip() for m in mod_id_str.split(';')]) + else: + all_mod_ids.append(mod_id_str) + + # Remove empty strings and duplicates while preserving order + seen = set() + unique_ids = [] + for mod_id in all_mod_ids: + if mod_id and mod_id not in seen: + seen.add(mod_id) + unique_ids.append(mod_id) + + return ';'.join(unique_ids) if unique_ids else None + + +def check_b42_compatible(item: Dict) -> Tuple[bool, str]: + """ + Check if mod appears to be B42 compatible. + Returns (is_compatible, reason). + """ + title = item.get("title", "").lower() + tags = [t.get("tag", "").lower() for t in item.get("tags", [])] + all_tags_str = " ".join(tags) + + # B42 indicators in title or tags + b42_patterns = [ + r'\bb42\b', + r'build\s*42', + r'\b42\.\d+', + r'\[b42\]', + r'\(b42\)', + ] + + for pattern in b42_patterns: + if re.search(pattern, title) or re.search(pattern, all_tags_str): + return True, "B42 mentioned in title/tags" + + # Check for B41 only indicators (might not be compatible) + b41_only = re.search(r'\bb41\b.*only', title) or re.search(r'build\s*41\s*only', title) + if b41_only: + return False, "B41 only" + + return False, "No B42 indicator found" + + +def has_special_characters(text: str) -> bool: + """Check if text contains special characters that need attention.""" + special = ["'", '"', "!", "&", "(", ")"] + return any(c in text for c in special) + + +def extract_collection_id(url_or_id: str) -> str: + """Extract collection ID from URL or return as-is if already an ID.""" + match = re.search(r'[?&]id=(\d+)', url_or_id) + return match.group(1) if match else url_or_id + + +def format_timestamp(unix_ts: int) -> str: + """Format Unix timestamp as readable date.""" + if not unix_ts: + return "Unknown" + return datetime.fromtimestamp(unix_ts).strftime("%Y-%m-%d") + + +def process_items(items: List[Dict]) -> Dict: + """ + Process workshop items and extract relevant information. + Returns a dict with processed data and analysis. + """ + processed = [] + duplicates = {} + issues = [] + + for item in items: + workshop_id = item.get("publishedfileid", "unknown") + title = item.get("title", "Unknown") + mod_id = extract_mod_id(item) + b42_compat, b42_reason = check_b42_compatible(item) + last_updated = item.get("time_updated", 0) + result_code = item.get("result", 0) + + entry = { + "workshop_id": workshop_id, + "title": title, + "mod_id": mod_id, + "b42_compatible": b42_compat, + "b42_reason": b42_reason, + "last_updated": format_timestamp(last_updated), + "has_special_chars": has_special_characters(mod_id or ""), + "result_code": result_code, + } + + # Track duplicates by mod_id + if mod_id: + if mod_id in duplicates: + duplicates[mod_id].append(workshop_id) + else: + duplicates[mod_id] = [workshop_id] + + # Track issues + if result_code != 1: + issues.append(f"Workshop item {workshop_id} returned error (result={result_code})") + if not mod_id: + issues.append(f"Workshop item {workshop_id} ({title}) has no Mod ID tag") + if entry["has_special_chars"]: + issues.append(f"Mod ID '{mod_id}' contains special characters") + + processed.append(entry) + + # Find actual duplicates (mod_id appearing more than once) + duplicate_mod_ids = {k: v for k, v in duplicates.items() if len(v) > 1} + + return { + "items": processed, + "duplicates": duplicate_mod_ids, + "issues": issues, + "total_count": len(items), + "valid_count": len([i for i in processed if i["mod_id"]]), + } + + +def output_report(data: Dict) -> None: + """Output human-readable report.""" + print("\n" + "=" * 80) + print("STEAM WORKSHOP MOD ANALYSIS REPORT") + print("=" * 80) + + print(f"\nTotal items: {data['total_count']}") + print(f"Valid items (with Mod ID): {data['valid_count']}") + + if data["duplicates"]: + print(f"\n{'=' * 40}") + print("DUPLICATE MOD IDs:") + print(f"{'=' * 40}") + for mod_id, workshop_ids in data["duplicates"].items(): + print(f" {mod_id}: {', '.join(workshop_ids)}") + + if data["issues"]: + print(f"\n{'=' * 40}") + print("ISSUES:") + print(f"{'=' * 40}") + for issue in data["issues"]: + print(f" - {issue}") + + print(f"\n{'=' * 40}") + print("MOD LIST:") + print(f"{'=' * 40}") + + for item in data["items"]: + b42_status = "[B42]" if item["b42_compatible"] else "[???]" + special = " [SPECIAL CHARS]" if item["has_special_chars"] else "" + mod_id_display = item["mod_id"] or "" + + print(f"\n Workshop: {item['workshop_id']}") + print(f" Title: {item['title']}") + print(f" Mod ID: {mod_id_display}{special}") + print(f" Status: {b42_status} {item['b42_reason']}") + print(f" Updated: {item['last_updated']}") + + +def output_ansible(data: Dict) -> None: + """Output ansible-ready configuration strings.""" + # Get unique, valid mod IDs (preserving order, removing duplicates) + seen_workshop = set() + seen_mod_ids = set() + workshop_items = [] + mod_ids = [] + + for item in data["items"]: + workshop_id = item["workshop_id"] + mod_id_str = item["mod_id"] + + # Skip if we've seen this workshop item + if workshop_id in seen_workshop: + continue + seen_workshop.add(workshop_id) + workshop_items.append(workshop_id) + + # Handle mod_id which may contain multiple IDs separated by semicolon + if mod_id_str: + for mod_id in mod_id_str.split(';'): + mod_id = mod_id.strip() + if mod_id and mod_id not in seen_mod_ids: + seen_mod_ids.add(mod_id) + mod_ids.append(mod_id) + + # Format for Build 42 (backslash prefix) + workshop_str = ";".join(workshop_items) + mod_ids_str = ";".join(f"\\{mid}" for mid in mod_ids) + + print("\n# Ansible Configuration for zomboid_mods") + print("# Copy these values to ansible/roles/podman/defaults/main.yml") + print("") + print("zomboid_mods:") + print(" workshop_items: >-") + print(f" {workshop_str}") + print(" mod_ids: >-") + print(f" {mod_ids_str}") + + if data["duplicates"]: + print("\n# WARNING: The following Mod IDs had duplicates (kept first occurrence):") + for mod_id, workshop_ids in data["duplicates"].items(): + print(f"# {mod_id}: {', '.join(workshop_ids)}") + + if data["issues"]: + print("\n# Issues found:") + for issue in data["issues"]: + print(f"# - {issue}") + + +def output_json(data: Dict) -> None: + """Output JSON data.""" + print(json.dumps(data, indent=2)) + + +def main(): + parser = argparse.ArgumentParser( + description="Query Steam Workshop for Project Zomboid mod details", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__ + ) + + parser.add_argument( + "workshop_ids", + nargs="?", + help="Semicolon-separated workshop IDs (e.g., 'ID1;ID2;ID3')" + ) + parser.add_argument( + "--collection", "-c", + help="Steam Workshop collection ID or URL" + ) + parser.add_argument( + "--json", "-j", + action="store_true", + help="Output raw JSON data" + ) + parser.add_argument( + "--ansible", "-a", + action="store_true", + help="Output ansible-ready configuration" + ) + parser.add_argument( + "--report", "-r", + action="store_true", + help="Output human-readable report (default)" + ) + + args = parser.parse_args() + + # Determine input source + if args.collection: + collection_id = extract_collection_id(args.collection) + print(f"Fetching collection {collection_id}...", file=sys.stderr) + item_ids = get_collection_items(collection_id) + if not item_ids: + print("Error: No items found in collection", file=sys.stderr) + sys.exit(1) + print(f"Found {len(item_ids)} items in collection", file=sys.stderr) + elif args.workshop_ids: + item_ids = [id.strip() for id in args.workshop_ids.split(";") if id.strip()] + else: + parser.print_help() + sys.exit(1) + + # Query Steam API + print(f"Querying {len(item_ids)} workshop items...", file=sys.stderr) + items = query_all_workshop_items(item_ids) + print(f"Retrieved {len(items)} item details", file=sys.stderr) + + # Process items + data = process_items(items) + + # Output based on format + if args.json: + output_json(data) + elif args.ansible: + output_ansible(data) + else: + output_report(data) + + +if __name__ == "__main__": + main()