graylog updates, test.debyl.io, scripts for reference

2026-01-13 16:08:38 -05:00
parent 364047558c
commit 34b45853e2
12 changed files with 1136 additions and 3 deletions
@@ -9,3 +9,5 @@
 # SSL certificates are now handled automatically by Caddy
    # - role: ssl  # REMOVED - Caddy handles all certificate management
    - role: github-actions
+    - role: graylog-config
+      tags: graylog-config
@@ -0,0 +1,170 @@
+---
+# Graylog API Configuration
+graylog_api_url: "https://logs.debyl.io/api"
+# graylog_api_token: defined in vault
+
+# Default index set for new streams (Default Stream index set)
+graylog_default_index_set: "6955a9d3cc3f442e78805871"
+
+# Stream definitions
+graylog_streams:
+  - title: "debyltech-api"
+    description: "Lambda API events from debyltech-api service"
+    rules:
+      - field: "service"
+        value: "debyltech-api"
+        type: 1  # EXACT match
+        inverted: false
+
+  - title: "caddy-access"
+    description: "Web traffic access logs from Caddy"
+    rules:
+      - field: "source"
+        value: "caddy"
+        type: 1
+        inverted: false
+      - field: "log_type"
+        value: "access"
+        type: 1
+        inverted: false
+
+  - title: "caddy-fulfillr"
+    description: "Fulfillr-specific web traffic"
+    rules:
+      - field: "source"
+        value: "caddy"
+        type: 1
+        inverted: false
+      - field: "tag"
+        value: "caddy.fulfillr"
+        type: 1
+        inverted: false
+
+  - title: "ssh-security"
+    description: "SSH access and security logs"
+    rules:
+      - field: "source"
+        value: "sshd"
+        type: 1
+        inverted: false
+
+  - title: "container-logs"
+    description: "Container stdout/stderr from Podman"
+    rules:
+      - field: "source"
+        value: "podman"
+        type: 1
+        inverted: false
+
+# Pipeline definitions
+graylog_pipelines:
+  - title: "GeoIP Enrichment"
+    description: "Add geographic information to access logs"
+    stages:
+      - stage: 0
+        match: "EITHER"
+        rules:
+          - "geoip_caddy_access"
+
+  - title: "Debyltech Event Classification"
+    description: "Categorize debyltech-api events"
+    stages:
+      - stage: 0
+        match: "EITHER"
+        rules:
+          - "classify_order_events"
+          - "classify_review_events"
+          - "classify_backinstock_events"
+          - "classify_shipping_events"
+          - "classify_product_events"
+      - stage: 1
+        match: "EITHER"
+        rules:
+          - "classify_default_events"
+
+# Pipeline rule definitions
+graylog_pipeline_rules:
+  - title: "geoip_caddy_access"
+    description: "GeoIP lookup for Caddy access logs"
+    source: |
+      rule "GeoIP for Caddy Access"
+      when
+        has_field("request_remote_ip")
+      then
+        let ip = to_string($message.request_remote_ip);
+        let geo = lookup("geoip-lookup", ip);
+        set_field("geo_country", geo["country"].iso_code);
+        set_field("geo_city", geo["city"].names.en);
+        set_field("geo_coordinates", geo["coordinates"]);
+      end
+
+  - title: "classify_order_events"
+    description: "Classify order events"
+    source: |
+      rule "Classify order events"
+      when
+        has_field("event") AND contains(to_string($message.event), "order")
+      then
+        set_field("event_category", "order");
+      end
+
+  - title: "classify_review_events"
+    description: "Classify review events"
+    source: |
+      rule "Classify review events"
+      when
+        has_field("event") AND contains(to_string($message.event), "review")
+      then
+        set_field("event_category", "review");
+      end
+
+  - title: "classify_backinstock_events"
+    description: "Classify back-in-stock events"
+    source: |
+      rule "Classify back-in-stock events"
+      when
+        has_field("event") AND contains(to_string($message.event), "backinstock")
+      then
+        set_field("event_category", "backinstock");
+      end
+
+  - title: "classify_shipping_events"
+    description: "Classify shipping events"
+    source: |
+      rule "Classify shipping events"
+      when
+        has_field("event") AND contains(to_string($message.event), "shipping")
+      then
+        set_field("event_category", "shipping");
+      end
+
+  - title: "classify_product_events"
+    description: "Classify product events"
+    source: |
+      rule "Classify product events"
+      when
+        has_field("event") AND contains(to_string($message.event), "product")
+      then
+        set_field("event_category", "product");
+      end
+
+  - title: "classify_default_events"
+    description: "Default category for unclassified events"
+    source: |
+      rule "Classify default events"
+      when
+        has_field("event") AND NOT has_field("event_category")
+      then
+        set_field("event_category", "other");
+      end
+
+# Pipeline to stream connections
+graylog_pipeline_connections:
+  - pipeline: "GeoIP Enrichment"
+    streams:
+      - "caddy-access"
+      - "caddy-fulfillr"
+
+  - pipeline: "Debyltech Event Classification"
+    streams:
+      - "debyltech-api"
@@ -0,0 +1,187 @@
+---
+# Graylog Lookup Table Management via REST API
+# Creates Data Adapters, Caches, and Lookup Tables for GeoIP
+
+# =============================================================================
+# Data Adapters
+# =============================================================================
+
+- name: get existing data adapters
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/adapters"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: existing_adapters
+  tags: graylog-config, lookup-tables
+
+- name: build list of existing adapter names
+  ansible.builtin.set_fact:
+    existing_adapter_names: "{{ existing_adapters.json.data_adapters | default([]) | map(attribute='name') | list }}"
+  tags: graylog-config, lookup-tables
+
+- name: create GeoIP data adapter
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/adapters"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      name: "geoip-adapter"
+      title: "GeoIP MaxMind Adapter"
+      description: "MaxMind GeoLite2-City database adapter"
+      config:
+        type: "maxmind_geoip"
+        path: "/usr/share/graylog/geoip/GeoLite2-City.mmdb"
+        database_type: "MAXMIND_CITY"
+        check_interval: 86400
+        check_interval_unit: "SECONDS"
+    status_code: [200, 201]
+  when: "'geoip-adapter' not in existing_adapter_names"
+  register: created_adapter
+  tags: graylog-config, lookup-tables
+
+# =============================================================================
+# Caches
+# =============================================================================
+
+- name: get existing caches
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/caches"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: existing_caches
+  tags: graylog-config, lookup-tables
+
+- name: build list of existing cache names
+  ansible.builtin.set_fact:
+    existing_cache_names: "{{ existing_caches.json.caches | default([]) | map(attribute='name') | list }}"
+  tags: graylog-config, lookup-tables
+
+- name: create GeoIP cache
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/caches"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      name: "geoip-cache"
+      title: "GeoIP Cache"
+      description: "Cache for GeoIP lookups"
+      config:
+        type: "guava_cache"
+        max_size: 10000
+        expire_after_access: 3600
+        expire_after_access_unit: "SECONDS"
+        expire_after_write: 0
+        expire_after_write_unit: "SECONDS"
+    status_code: [200, 201]
+  when: "'geoip-cache' not in existing_cache_names"
+  register: created_cache
+  tags: graylog-config, lookup-tables
+
+# =============================================================================
+# Lookup Tables
+# =============================================================================
+
+- name: refresh adapters list
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/adapters"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: all_adapters
+  tags: graylog-config, lookup-tables
+
+- name: refresh caches list
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/caches"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: all_caches
+  tags: graylog-config, lookup-tables
+
+- name: build adapter and cache ID maps
+  ansible.builtin.set_fact:
+    adapter_id_map: "{{ all_adapters.json.data_adapters | default([]) | items2dict(key_name='name', value_name='id') }}"
+    cache_id_map: "{{ all_caches.json.caches | default([]) | items2dict(key_name='name', value_name='id') }}"
+  tags: graylog-config, lookup-tables
+
+- name: get existing lookup tables
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/tables"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: existing_tables
+  tags: graylog-config, lookup-tables
+
+- name: build list of existing table names
+  ansible.builtin.set_fact:
+    existing_table_names: "{{ existing_tables.json.lookup_tables | default([]) | map(attribute='name') | list }}"
+  tags: graylog-config, lookup-tables
+
+- name: create GeoIP lookup table
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/lookup/tables"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      name: "geoip-lookup"
+      title: "GeoIP Lookup Table"
+      description: "Lookup table for GeoIP resolution"
+      cache_id: "{{ cache_id_map['geoip-cache'] }}"
+      data_adapter_id: "{{ adapter_id_map['geoip-adapter'] }}"
+      default_single_value: ""
+      default_single_value_type: "NULL"
+      default_multi_value: ""
+      default_multi_value_type: "NULL"
+    status_code: [200, 201]
+  when:
+    - "'geoip-lookup' not in existing_table_names"
+    - "'geoip-adapter' in adapter_id_map"
+    - "'geoip-cache' in cache_id_map"
+  tags: graylog-config, lookup-tables
@@ -0,0 +1,15 @@
+---
+# Graylog Configuration via REST API
+# Configures lookup tables, streams, pipelines, and pipeline rules
+
+- name: include lookup table configuration
+  ansible.builtin.include_tasks: lookup_tables.yml
+  tags: graylog-config, lookup-tables
+
+- name: include stream configuration
+  ansible.builtin.include_tasks: streams.yml
+  tags: graylog-config, streams
+
+- name: include pipeline configuration
+  ansible.builtin.include_tasks: pipelines.yml
+  tags: graylog-config, pipelines
@@ -0,0 +1,188 @@
+---
+# Graylog Pipeline Management via REST API
+# Idempotent: checks for existing pipelines/rules before creating
+
+# =============================================================================
+# Pipeline Rules
+# =============================================================================
+
+- name: get existing pipeline rules
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/rule"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: existing_rules
+  tags: graylog-config, pipelines
+
+- name: build list of existing rule titles
+  ansible.builtin.set_fact:
+    existing_rule_titles: "{{ existing_rules.json | map(attribute='title') | list }}"
+    existing_rule_map: "{{ existing_rules.json | items2dict(key_name='title', value_name='id') }}"
+  tags: graylog-config, pipelines
+
+- name: create pipeline rules
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/rule"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      title: "{{ item.title }}"
+      description: "{{ item.description | default('') }}"
+      source: "{{ item.source }}"
+    status_code: [200, 201]
+  loop: "{{ graylog_pipeline_rules }}"
+  loop_control:
+    label: "{{ item.title }}"
+  when: item.title not in existing_rule_titles
+  register: created_rules
+  tags: graylog-config, pipelines
+
+- name: refresh rule list after creation
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/rule"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: all_rules
+  tags: graylog-config, pipelines
+
+- name: build rule ID lookup
+  ansible.builtin.set_fact:
+    rule_id_map: "{{ all_rules.json | items2dict(key_name='title', value_name='id') }}"
+  tags: graylog-config, pipelines
+
+# =============================================================================
+# Pipelines
+# =============================================================================
+
+- name: get existing pipelines
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/pipeline"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: existing_pipelines
+  tags: graylog-config, pipelines
+
+- name: build list of existing pipeline titles
+  ansible.builtin.set_fact:
+    existing_pipeline_titles: "{{ existing_pipelines.json | map(attribute='title') | list }}"
+    existing_pipeline_map: "{{ existing_pipelines.json | items2dict(key_name='title', value_name='id') }}"
+  tags: graylog-config, pipelines
+
+- name: build pipeline source for each pipeline
+  ansible.builtin.set_fact:
+    pipeline_sources: "{{ pipeline_sources | default({}) | combine({item.title: lookup('template', 'pipeline_source.j2')}) }}"
+  loop: "{{ graylog_pipelines }}"
+  loop_control:
+    label: "{{ item.title }}"
+  vars:
+    pipeline: "{{ item }}"
+  tags: graylog-config, pipelines
+
+- name: create pipelines
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/pipeline"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      title: "{{ item.title }}"
+      description: "{{ item.description | default('') }}"
+      source: "{{ pipeline_sources[item.title] }}"
+    status_code: [200, 201]
+  loop: "{{ graylog_pipelines }}"
+  loop_control:
+    label: "{{ item.title }}"
+  when: item.title not in existing_pipeline_titles
+  register: created_pipelines
+  tags: graylog-config, pipelines
+
+- name: refresh pipeline list after creation
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/pipeline"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: all_pipelines
+  tags: graylog-config, pipelines
+
+- name: build pipeline ID lookup
+  ansible.builtin.set_fact:
+    pipeline_id_map: "{{ all_pipelines.json | items2dict(key_name='title', value_name='id') }}"
+  tags: graylog-config, pipelines
+
+# =============================================================================
+# Pipeline to Stream Connections
+# =============================================================================
+
+- name: get current pipeline connections
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/connections"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: current_connections
+  tags: graylog-config, pipelines
+
+- name: connect pipelines to streams
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/system/pipelines/connections/to_stream"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      stream_id: "{{ stream_id_map[item.1] }}"
+      pipeline_ids:
+        - "{{ pipeline_id_map[item.0.pipeline] }}"
+    status_code: [200, 201]
+  loop: "{{ graylog_pipeline_connections | subelements('streams') }}"
+  loop_control:
+    label: "{{ item.0.pipeline }} -> {{ item.1 }}"
+  when:
+    - item.0.pipeline in pipeline_id_map
+    - item.1 in stream_id_map
+  ignore_errors: true
+  tags: graylog-config, pipelines
@@ -0,0 +1,127 @@
+---
+# Graylog Stream Management via REST API
+# Idempotent: checks for existing streams before creating
+
+- name: get existing streams
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/streams"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: existing_streams
+  tags: graylog-config, streams
+
+- name: build list of existing stream titles
+  ansible.builtin.set_fact:
+    existing_stream_titles: "{{ existing_streams.json.streams | map(attribute='title') | list }}"
+    existing_stream_map: "{{ existing_streams.json.streams | items2dict(key_name='title', value_name='id') }}"
+  tags: graylog-config, streams
+
+- name: create streams
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/streams"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      title: "{{ item.title }}"
+      description: "{{ item.description | default('') }}"
+      index_set_id: "{{ item.index_set_id | default(graylog_default_index_set) }}"
+      remove_matches_from_default_stream: "{{ item.remove_from_default | default(true) }}"
+    status_code: [200, 201]
+  loop: "{{ graylog_streams }}"
+  loop_control:
+    label: "{{ item.title }}"
+  when: item.title not in existing_stream_titles
+  register: created_streams
+  tags: graylog-config, streams
+
+- name: refresh stream list after creation
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/streams"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  register: all_streams
+  tags: graylog-config, streams
+
+- name: build stream ID lookup
+  ansible.builtin.set_fact:
+    stream_id_map: "{{ all_streams.json.streams | items2dict(key_name='title', value_name='id') }}"
+  tags: graylog-config, streams
+
+- name: get existing rules for each stream
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.title] }}/rules"
+    method: GET
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Accept: application/json
+    status_code: 200
+  loop: "{{ graylog_streams }}"
+  loop_control:
+    label: "{{ item.title }}"
+  when: item.title in stream_id_map
+  register: stream_rules
+  tags: graylog-config, streams
+
+- name: create stream rules
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.0.title] }}/rules"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+      Content-Type: application/json
+    body_format: json
+    body:
+      field: "{{ item.1.field }}"
+      value: "{{ item.1.value }}"
+      type: "{{ item.1.type | default(1) }}"
+      inverted: "{{ item.1.inverted | default(false) }}"
+      description: "{{ item.1.description | default('') }}"
+    status_code: [200, 201]
+  loop: "{{ graylog_streams | subelements('rules', skip_missing=True) }}"
+  loop_control:
+    label: "{{ item.0.title }} - {{ item.1.field }}:{{ item.1.value }}"
+  when:
+    - item.0.title in stream_id_map
+    - stream_rules.results | selectattr('item.title', 'equalto', item.0.title) | map(attribute='json.stream_rules') | first | default([]) | selectattr('field', 'equalto', item.1.field) | selectattr('value', 'equalto', item.1.value) | list | length == 0
+  tags: graylog-config, streams
+
+- name: start streams
+  ansible.builtin.uri:
+    url: "{{ graylog_api_url }}/streams/{{ stream_id_map[item.title] }}/resume"
+    method: POST
+    user: "{{ graylog_api_token }}"
+    password: token
+    force_basic_auth: true
+    headers:
+      X-Requested-By: ansible
+    status_code: [200, 204]
+  loop: "{{ graylog_streams }}"
+  loop_control:
+    label: "{{ item.title }}"
+  when: item.title in stream_id_map
+  ignore_errors: true
+  tags: graylog-config, streams
@@ -0,0 +1,8 @@
+pipeline "{{ pipeline.title }}"
+{% for stage in pipeline.stages %}
+stage {{ stage.stage }} match {{ stage.match | default('EITHER') }}
+{% for rule in stage.rules %}
+rule "{{ rule }}"
+{% endfor %}
+{% endfor %}
+end
@@ -25,6 +25,7 @@
 # Legacy volume mounts removed - Caddy manages certificates automatically
      # Mount static site directories
      - "/usr/local/share/fulfillr-site:/usr/local/share/fulfillr-site:ro"
+      - "/usr/local/share/test-site:/srv/test-site:ro"
    env:
      CADDY_ADMIN: "0.0.0.0:2019"
    restart_policy: always
@@ -27,6 +27,17 @@
    - caddy
    - ssl

+- name: create test-site directory
+  become: true
+  ansible.builtin.file:
+    path: /usr/local/share/test-site
+    state: directory
+    owner: "{{ ansible_user }}"
+    group: "{{ ansible_user }}"
+    mode: '0755'
+  tags:
+    - caddy
+
 - name: deploy caddyfile
  become: true
  ansible.builtin.template:
@@ -346,3 +346,26 @@
 		format json
 	}
 }
+
+# ============================================================================
+# TEST/STAGING SITES
+# ============================================================================
+
+# Test Site - test.debyl.io (Public static site hosting, no caching)
+test.debyl.io {
+	import common_headers
+
+	root * /srv/test-site
+	try_files {path} {path}/ /index.html
+	file_server
+
+	# Disable all caching for test sites
+	header Cache-Control "no-store, no-cache, must-revalidate, max-age=0"
+	header Pragma "no-cache"
+
+	log {
+		output file /var/log/caddy/test.log
+		format json
+		level {{ caddy_log_level }}
+	}
+}
@@ -0,0 +1,401 @@
+#!/usr/bin/env python3
+"""
+Steam Workshop Query Tool for Project Zomboid Mods
+
+Queries Steam API to get mod details including correct Mod IDs with special characters.
+Useful for generating properly formatted mod lists for Build 42 servers.
+
+Usage:
+    # Query individual workshop items (semicolon-separated)
+    python steam-workshop-query.py "ID1;ID2;ID3"
+
+    # Query from a Steam Workshop collection
+    python steam-workshop-query.py --collection 3625776190
+    python steam-workshop-query.py --collection "https://steamcommunity.com/sharedfiles/filedetails?id=3625776190"
+
+    # Output formats
+    --json       Output raw JSON data
+    --ansible    Output workshop_items and mod_ids strings for ansible config
+    --report     Human-readable report (default)
+
+Examples:
+    python steam-workshop-query.py "3171167894;3330403100" --ansible
+    python steam-workshop-query.py --collection 3625776190 --report
+"""
+
+import requests
+import json
+import sys
+import time
+import re
+import argparse
+from typing import List, Dict, Optional, Tuple
+from datetime import datetime
+
+STEAM_API_DETAILS = "https://api.steampowered.com/ISteamRemoteStorage/GetPublishedFileDetails/v1/"
+STEAM_API_COLLECTION = "https://api.steampowered.com/ISteamRemoteStorage/GetCollectionDetails/v1/"
+BATCH_SIZE = 50  # Conservative batch size to avoid rate limits
+DELAY_BETWEEN_BATCHES = 1.0  # seconds
+
+
+def get_collection_items(collection_id: str) -> List[str]:
+    """Fetch all workshop item IDs from a Steam Workshop collection."""
+    data = {"collectioncount": 1, "publishedfileids[0]": collection_id}
+    response = requests.post(STEAM_API_COLLECTION, data=data)
+    response.raise_for_status()
+    result = response.json()
+
+    items = []
+    collection_details = result.get("response", {}).get("collectiondetails", [])
+
+    if not collection_details:
+        print(f"Warning: No collection found with ID {collection_id}", file=sys.stderr)
+        return items
+
+    for coll in collection_details:
+        if coll.get("result") != 1:
+            print(f"Warning: Collection {collection_id} returned error result", file=sys.stderr)
+            continue
+        for child in coll.get("children", []):
+            file_id = child.get("publishedfileid")
+            if file_id:
+                items.append(file_id)
+
+    return items
+
+
+def query_workshop_items_batch(item_ids: List[str]) -> List[Dict]:
+    """Query Steam API for a batch of workshop item details."""
+    data = {"itemcount": len(item_ids)}
+    for i, item_id in enumerate(item_ids):
+        data[f"publishedfileids[{i}]"] = item_id
+
+    response = requests.post(STEAM_API_DETAILS, data=data)
+    response.raise_for_status()
+    result = response.json()
+
+    return result.get("response", {}).get("publishedfiledetails", [])
+
+
+def query_all_workshop_items(item_ids: List[str]) -> List[Dict]:
+    """Query Steam API for all workshop items, handling batching."""
+    all_items = []
+
+    for i in range(0, len(item_ids), BATCH_SIZE):
+        batch = item_ids[i:i + BATCH_SIZE]
+        print(f"Querying batch {i // BATCH_SIZE + 1} ({len(batch)} items)...", file=sys.stderr)
+
+        items = query_workshop_items_batch(batch)
+        all_items.extend(items)
+
+        # Delay between batches to avoid rate limiting
+        if i + BATCH_SIZE < len(item_ids):
+            time.sleep(DELAY_BETWEEN_BATCHES)
+
+    return all_items
+
+
+def extract_mod_id(item: Dict) -> Optional[str]:
+    """
+    Extract Mod ID(s) from item description.
+    PZ mods typically include 'Mod ID: xxx' in their description.
+    Some mods have multiple Mod IDs on separate lines or comma-separated.
+    """
+    description = item.get("description", "")
+
+    # Find ALL "Mod ID: xxx" patterns in description (multiple lines)
+    matches = re.findall(r'Mod ID:\s*([^\r\n]+)', description, re.IGNORECASE)
+
+    if not matches:
+        return None
+
+    all_mod_ids = []
+    for match in matches:
+        mod_id_str = match.strip().rstrip('.')
+        # Handle comma or semicolon separated mod IDs on same line
+        if ',' in mod_id_str:
+            all_mod_ids.extend([m.strip() for m in mod_id_str.split(',')])
+        elif ';' in mod_id_str:
+            all_mod_ids.extend([m.strip() for m in mod_id_str.split(';')])
+        else:
+            all_mod_ids.append(mod_id_str)
+
+    # Remove empty strings and duplicates while preserving order
+    seen = set()
+    unique_ids = []
+    for mod_id in all_mod_ids:
+        if mod_id and mod_id not in seen:
+            seen.add(mod_id)
+            unique_ids.append(mod_id)
+
+    return ';'.join(unique_ids) if unique_ids else None
+
+
+def check_b42_compatible(item: Dict) -> Tuple[bool, str]:
+    """
+    Check if mod appears to be B42 compatible.
+    Returns (is_compatible, reason).
+    """
+    title = item.get("title", "").lower()
+    tags = [t.get("tag", "").lower() for t in item.get("tags", [])]
+    all_tags_str = " ".join(tags)
+
+    # B42 indicators in title or tags
+    b42_patterns = [
+        r'\bb42\b',
+        r'build\s*42',
+        r'\b42\.\d+',
+        r'\[b42\]',
+        r'\(b42\)',
+    ]
+
+    for pattern in b42_patterns:
+        if re.search(pattern, title) or re.search(pattern, all_tags_str):
+            return True, "B42 mentioned in title/tags"
+
+    # Check for B41 only indicators (might not be compatible)
+    b41_only = re.search(r'\bb41\b.*only', title) or re.search(r'build\s*41\s*only', title)
+    if b41_only:
+        return False, "B41 only"
+
+    return False, "No B42 indicator found"
+
+
+def has_special_characters(text: str) -> bool:
+    """Check if text contains special characters that need attention."""
+    special = ["'", '"', "!", "&", "(", ")"]
+    return any(c in text for c in special)
+
+
+def extract_collection_id(url_or_id: str) -> str:
+    """Extract collection ID from URL or return as-is if already an ID."""
+    match = re.search(r'[?&]id=(\d+)', url_or_id)
+    return match.group(1) if match else url_or_id
+
+
+def format_timestamp(unix_ts: int) -> str:
+    """Format Unix timestamp as readable date."""
+    if not unix_ts:
+        return "Unknown"
+    return datetime.fromtimestamp(unix_ts).strftime("%Y-%m-%d")
+
+
+def process_items(items: List[Dict]) -> Dict:
+    """
+    Process workshop items and extract relevant information.
+    Returns a dict with processed data and analysis.
+    """
+    processed = []
+    duplicates = {}
+    issues = []
+
+    for item in items:
+        workshop_id = item.get("publishedfileid", "unknown")
+        title = item.get("title", "Unknown")
+        mod_id = extract_mod_id(item)
+        b42_compat, b42_reason = check_b42_compatible(item)
+        last_updated = item.get("time_updated", 0)
+        result_code = item.get("result", 0)
+
+        entry = {
+            "workshop_id": workshop_id,
+            "title": title,
+            "mod_id": mod_id,
+            "b42_compatible": b42_compat,
+            "b42_reason": b42_reason,
+            "last_updated": format_timestamp(last_updated),
+            "has_special_chars": has_special_characters(mod_id or ""),
+            "result_code": result_code,
+        }
+
+        # Track duplicates by mod_id
+        if mod_id:
+            if mod_id in duplicates:
+                duplicates[mod_id].append(workshop_id)
+            else:
+                duplicates[mod_id] = [workshop_id]
+
+        # Track issues
+        if result_code != 1:
+            issues.append(f"Workshop item {workshop_id} returned error (result={result_code})")
+        if not mod_id:
+            issues.append(f"Workshop item {workshop_id} ({title}) has no Mod ID tag")
+        if entry["has_special_chars"]:
+            issues.append(f"Mod ID '{mod_id}' contains special characters")
+
+        processed.append(entry)
+
+    # Find actual duplicates (mod_id appearing more than once)
+    duplicate_mod_ids = {k: v for k, v in duplicates.items() if len(v) > 1}
+
+    return {
+        "items": processed,
+        "duplicates": duplicate_mod_ids,
+        "issues": issues,
+        "total_count": len(items),
+        "valid_count": len([i for i in processed if i["mod_id"]]),
+    }
+
+
+def output_report(data: Dict) -> None:
+    """Output human-readable report."""
+    print("\n" + "=" * 80)
+    print("STEAM WORKSHOP MOD ANALYSIS REPORT")
+    print("=" * 80)
+
+    print(f"\nTotal items: {data['total_count']}")
+    print(f"Valid items (with Mod ID): {data['valid_count']}")
+
+    if data["duplicates"]:
+        print(f"\n{'=' * 40}")
+        print("DUPLICATE MOD IDs:")
+        print(f"{'=' * 40}")
+        for mod_id, workshop_ids in data["duplicates"].items():
+            print(f"  {mod_id}: {', '.join(workshop_ids)}")
+
+    if data["issues"]:
+        print(f"\n{'=' * 40}")
+        print("ISSUES:")
+        print(f"{'=' * 40}")
+        for issue in data["issues"]:
+            print(f"  - {issue}")
+
+    print(f"\n{'=' * 40}")
+    print("MOD LIST:")
+    print(f"{'=' * 40}")
+
+    for item in data["items"]:
+        b42_status = "[B42]" if item["b42_compatible"] else "[???]"
+        special = " [SPECIAL CHARS]" if item["has_special_chars"] else ""
+        mod_id_display = item["mod_id"] or "<NO MOD ID>"
+
+        print(f"\n  Workshop: {item['workshop_id']}")
+        print(f"  Title:    {item['title']}")
+        print(f"  Mod ID:   {mod_id_display}{special}")
+        print(f"  Status:   {b42_status} {item['b42_reason']}")
+        print(f"  Updated:  {item['last_updated']}")
+
+
+def output_ansible(data: Dict) -> None:
+    """Output ansible-ready configuration strings."""
+    # Get unique, valid mod IDs (preserving order, removing duplicates)
+    seen_workshop = set()
+    seen_mod_ids = set()
+    workshop_items = []
+    mod_ids = []
+
+    for item in data["items"]:
+        workshop_id = item["workshop_id"]
+        mod_id_str = item["mod_id"]
+
+        # Skip if we've seen this workshop item
+        if workshop_id in seen_workshop:
+            continue
+        seen_workshop.add(workshop_id)
+        workshop_items.append(workshop_id)
+
+        # Handle mod_id which may contain multiple IDs separated by semicolon
+        if mod_id_str:
+            for mod_id in mod_id_str.split(';'):
+                mod_id = mod_id.strip()
+                if mod_id and mod_id not in seen_mod_ids:
+                    seen_mod_ids.add(mod_id)
+                    mod_ids.append(mod_id)
+
+    # Format for Build 42 (backslash prefix)
+    workshop_str = ";".join(workshop_items)
+    mod_ids_str = ";".join(f"\\{mid}" for mid in mod_ids)
+
+    print("\n# Ansible Configuration for zomboid_mods")
+    print("# Copy these values to ansible/roles/podman/defaults/main.yml")
+    print("")
+    print("zomboid_mods:")
+    print("  workshop_items: >-")
+    print(f"    {workshop_str}")
+    print("  mod_ids: >-")
+    print(f"    {mod_ids_str}")
+
+    if data["duplicates"]:
+        print("\n# WARNING: The following Mod IDs had duplicates (kept first occurrence):")
+        for mod_id, workshop_ids in data["duplicates"].items():
+            print(f"#   {mod_id}: {', '.join(workshop_ids)}")
+
+    if data["issues"]:
+        print("\n# Issues found:")
+        for issue in data["issues"]:
+            print(f"#   - {issue}")
+
+
+def output_json(data: Dict) -> None:
+    """Output JSON data."""
+    print(json.dumps(data, indent=2))
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Query Steam Workshop for Project Zomboid mod details",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__
+    )
+
+    parser.add_argument(
+        "workshop_ids",
+        nargs="?",
+        help="Semicolon-separated workshop IDs (e.g., 'ID1;ID2;ID3')"
+    )
+    parser.add_argument(
+        "--collection", "-c",
+        help="Steam Workshop collection ID or URL"
+    )
+    parser.add_argument(
+        "--json", "-j",
+        action="store_true",
+        help="Output raw JSON data"
+    )
+    parser.add_argument(
+        "--ansible", "-a",
+        action="store_true",
+        help="Output ansible-ready configuration"
+    )
+    parser.add_argument(
+        "--report", "-r",
+        action="store_true",
+        help="Output human-readable report (default)"
+    )
+
+    args = parser.parse_args()
+
+    # Determine input source
+    if args.collection:
+        collection_id = extract_collection_id(args.collection)
+        print(f"Fetching collection {collection_id}...", file=sys.stderr)
+        item_ids = get_collection_items(collection_id)
+        if not item_ids:
+            print("Error: No items found in collection", file=sys.stderr)
+            sys.exit(1)
+        print(f"Found {len(item_ids)} items in collection", file=sys.stderr)
+    elif args.workshop_ids:
+        item_ids = [id.strip() for id in args.workshop_ids.split(";") if id.strip()]
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+    # Query Steam API
+    print(f"Querying {len(item_ids)} workshop items...", file=sys.stderr)
+    items = query_all_workshop_items(item_ids)
+    print(f"Retrieved {len(items)} item details", file=sys.stderr)
+
+    # Process items
+    data = process_items(items)
+
+    # Output based on format
+    if args.json:
+        output_json(data)
+    elif args.ansible:
+        output_ansible(data)
+    else:
+        output_report(data)
+
+
+if __name__ == "__main__":
+    main()