nostrdvm/nostr_dvm/tasks/content_discovery_update_db_only.py

309 lines
12 KiB
Python

import json
import json
import os
from datetime import timedelta
from itertools import islice
from nostr_sdk import Timestamp, PublicKey, Keys, Options, SecretKey, NostrSigner, NostrDatabase, \
ClientBuilder, Filter, SyncOptions, SyncDirection, init_logger, LogLevel, Kind, \
RelayLimits, RelayFilteringMode
from nostr_dvm.interfaces.dvmtaskinterface import DVMTaskInterface, process_venv
from nostr_dvm.utils import definitions
from nostr_dvm.utils.admin_utils import AdminConfig
from nostr_dvm.utils.database_utils import init_db
from nostr_dvm.utils.definitions import EventDefinitions
from nostr_dvm.utils.dvmconfig import DVMConfig, build_default_config
from nostr_dvm.utils.nip88_utils import NIP88Config, check_and_set_d_tag_nip88, check_and_set_tiereventid_nip88
from nostr_dvm.utils.nip89_utils import NIP89Config, check_and_set_d_tag, create_amount_tag
from nostr_dvm.utils.output_utils import post_process_list_to_events
from nostr_dvm.utils.wot_utils import build_wot_network
"""
This File contains a Module to update the database for content discovery dvms
Accepted Inputs: none
Outputs: A list of events
Params: None
"""
class DicoverContentDBUpdateScheduler(DVMTaskInterface):
KIND: Kind = EventDefinitions.KIND_NIP90_CONTENT_DISCOVERY
TASK: str = "update-db-on-schedule"
FIX_COST: float = 0
dvm_config: DVMConfig
request_form = None
last_schedule: int = 0
min_reactions = 2
db_since = 10 * 3600
db_name = "db/nostr_default_recent_notes.db"
search_list = []
avoid_list = []
must_list = []
personalized = False
result = ""
database = None
wot_counter = 0
max_db_size = 280
async def init_dvm(self, name, dvm_config: DVMConfig, nip89config: NIP89Config, nip88config: NIP88Config = None,
admin_config: AdminConfig = None, options=None):
# Generate Generic request form for dvms that provide generic results (e.g only a calculation per update,
# not per call)
self.request_form = {"jobID": "generic"}
if dvm_config.DATABASE is not None:
self.database = dvm_config.DATABASE
opts = {
"max_results": 200,
}
self.request_form['options'] = json.dumps(opts)
dvm_config.SCRIPT = os.path.abspath(__file__)
if self.options.get("db_name"):
self.db_name = self.options.get("db_name")
if self.options.get("db_since"):
self.db_since = int(self.options.get("db_since"))
if self.options.get("max_db_size"):
self.max_db_size = int(self.options.get("max_db_size"))
use_logger = False
if use_logger:
init_logger(LogLevel.DEBUG)
if self.dvm_config.UPDATE_DATABASE:
await self.sync_db()
async def is_input_supported(self, tags, client=None, dvm_config=None):
for tag in tags:
if tag.as_vec()[0] == 'i':
input_value = tag.as_vec()[1]
input_type = tag.as_vec()[2]
if input_type != "text":
return False
return True
async def create_request_from_nostr_event(self, event, client=None, dvm_config=None):
self.dvm_config = dvm_config
request_form = {"jobID": event.id().to_hex()}
# default values
max_results = 200
for tag in event.tags().to_vec():
if tag.as_vec()[0] == 'i':
input_type = tag.as_vec()[2]
elif tag.as_vec()[0] == 'param':
param = tag.as_vec()[1]
if param == "max_results": # check for param type
max_results = int(tag.as_vec()[2])
options = {
"max_results": max_results,
}
request_form['options'] = json.dumps(options)
self.request_form = request_form
return request_form
async def process(self, request_form):
return "I don't return results, I just update the DB."
async def post_process(self, result, event):
"""Overwrite the interface function to return a social client readable format, if requested"""
for tag in event.tags().to_vec():
if tag.as_vec()[0] == 'output':
format = tag.as_vec()[1]
if format == "text/plain": # check for output type
result = post_process_list_to_events(result)
# if not text/plain, don't post-process
return result
async def schedule(self, dvm_config):
if dvm_config.SCHEDULE_UPDATES_SECONDS == 0:
return 0
else:
if Timestamp.now().as_secs() >= self.last_schedule + dvm_config.SCHEDULE_UPDATES_SECONDS:
if self.dvm_config.UPDATE_DATABASE:
await self.sync_db()
self.last_schedule = Timestamp.now().as_secs()
return 1
async def sync_db(self):
try:
relaylimits = RelayLimits.disable()
opts = (Options().relay_limits(relaylimits))
if self.dvm_config.WOT_FILTERING:
opts = opts.filtering_mode(RelayFilteringMode.WHITELIST)
sk = SecretKey.parse(self.dvm_config.PRIVATE_KEY)
keys = Keys.parse(sk.to_hex())
if self.database is None:
self.database = await init_db(self.db_name, True, self.max_db_size)
#self.database = NostrDatabase.lmdb(self.db_name)
cli = ClientBuilder().signer(NostrSigner.keys(keys)).database(self.database).opts(opts).build()
for relay in self.dvm_config.SYNC_DB_RELAY_LIST:
await cli.add_relay(relay)
await cli.connect()
if self.dvm_config.WOT_FILTERING and self.wot_counter == 0:
print("Calculating WOT for " + str(self.dvm_config.WOT_BASED_ON_NPUBS))
filtering = cli.filtering()
index_map, G = await build_wot_network(self.dvm_config.WOT_BASED_ON_NPUBS,
depth=self.dvm_config.WOT_DEPTH, max_batch=500,
max_time_request=10, dvm_config=self.dvm_config)
# Do we actually need pagerank here?
# print('computing global pagerank...')
# tic = time.time()
# p_G = nx.pagerank(G, tol=1e-12)
# print("network after pagerank: " + str(len(p_G)))
wot_keys = []
for item in islice(G, len(G)):
key = next((PublicKey.parse(pubkey) for pubkey, id in index_map.items() if id == item),
None)
wot_keys.append(key)
# toc = time.time()
# print(f'finished in {toc - tic} seconds')
await filtering.add_public_keys(wot_keys)
self.wot_counter += 1
# only calculate wot every 10th call
if self.wot_counter >= 10:
self.wot_counter = 0
# Mute public key
# await cli. (self.dvm_config.MUTE)
timestamp_since = Timestamp.now().as_secs() - self.db_since
since = Timestamp.from_secs(timestamp_since)
filter1 = Filter().kinds(
[definitions.EventDefinitions.KIND_NOTE, definitions.EventDefinitions.KIND_REACTION,
definitions.EventDefinitions.KIND_ZAP]).since(since) # Notes, reactions, zaps
if self.dvm_config.LOGLEVEL.value >= LogLevel.DEBUG.value:
print("[" + self.dvm_config.IDENTIFIER + "] Syncing notes of the last " + str(
self.db_since) + " seconds.. this might take a while..")
dbopts = SyncOptions().direction(SyncDirection.DOWN)
await cli.sync(filter1, dbopts)
await cli.database().delete(Filter().until(Timestamp.from_secs(
Timestamp.now().as_secs() - self.db_since))) # Clear old events so db doesn't get too full.
await cli.shutdown()
if self.dvm_config.LOGLEVEL.value >= LogLevel.DEBUG.value:
print(
"[" + self.dvm_config.IDENTIFIER + "] Done Syncing Notes of the last " + str(
self.db_since) + " seconds..")
except Exception as e:
print(e)
# We build an example here that we can call by either calling this file directly from the main directory,
# or by adding it to our playground. You can call the example and adjust it to your needs or redefine it in the
# playground or elsewhere
def build_example(name, identifier, admin_config, options, image, description, update_rate=600, cost=0,
processing_msg=None, update_db=True):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
dvm_config.SHOWLOG = True
dvm_config.SCHEDULE_UPDATES_SECONDS = update_rate # Every 10 minutes
dvm_config.UPDATE_DATABASE = update_db
# Activate these to use a subscription based model instead
# dvm_config.SUBSCRIPTION_REQUIRED = True
# dvm_config.SUBSCRIPTION_DAILY_COST = 1
dvm_config.FIX_COST = cost
dvm_config.CUSTOM_PROCESSING_MESSAGE = processing_msg
admin_config.LUD16 = dvm_config.LN_ADDRESS
# Add NIP89
nip89info = {
"name": name,
"picture": image,
"about": description,
"lud16": dvm_config.LN_ADDRESS,
"supportsEncryption": True,
"acceptsNutZaps": dvm_config.ENABLE_NUTZAP,
"personalized": False,
"amount": create_amount_tag(cost),
"nip90Params": {
"max_results": {
"required": False,
"values": [],
"description": "The number of maximum results to return (default currently 100)"
}
}
}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["picture"])
nip89config.CONTENT = json.dumps(nip89info)
return DicoverContentDBUpdateScheduler(name=name, dvm_config=dvm_config, nip89config=nip89config,
admin_config=admin_config, options=options)
def build_example_subscription(name, identifier, admin_config, options, image, description, processing_msg=None,
update_db=True):
dvm_config = build_default_config(identifier)
dvm_config.USE_OWN_VENV = False
dvm_config.SHOWLOG = True
dvm_config.SCHEDULE_UPDATES_SECONDS = 600 # Every 10 minutes
dvm_config.UPDATE_DATABASE = update_db
# Activate these to use a subscription based model instead
dvm_config.FIX_COST = 0
dvm_config.CUSTOM_PROCESSING_MESSAGE = processing_msg
admin_config.LUD16 = dvm_config.LN_ADDRESS
# Add NIP89
nip89info = {
"name": name,
"picture": image,
"about": description,
"lud16": dvm_config.LN_ADDRESS,
"supportsEncryption": True,
"acceptsNutZaps": dvm_config.ENABLE_NUTZAP,
"subscription": True,
"personalized": False,
"nip90Params": {
"max_results": {
"required": False,
"values": [],
"description": "The number of maximum results to return (default currently 100)"
}
}
}
nip89config = NIP89Config()
nip89config.DTAG = check_and_set_d_tag(identifier, name, dvm_config.PRIVATE_KEY, nip89info["picture"])
nip89config.CONTENT = json.dumps(nip89info)
nip88config = NIP88Config()
nip88config.DTAG = check_and_set_d_tag_nip88(identifier, name, dvm_config.PRIVATE_KEY, nip89info["picture"])
nip88config.TIER_EVENT = check_and_set_tiereventid_nip88(identifier, "1")
nip89config.NAME = name
nip88config.IMAGE = nip89info["picture"]
nip88config.TITLE = name
nip88config.AMOUNT_DAILY = 100
nip88config.AMOUNT_MONTHLY = 2000
nip88config.CONTENT = "Subscribe to the DVM for unlimited use during your subscription"
nip88config.PERK1DESC = "Unlimited requests"
nip88config.PERK2DESC = "Support NostrDVM & NostrSDK development"
nip88config.PAYMENT_VERIFIER_PUBKEY = "5b5c045ecdf66fb540bdf2049fe0ef7f1a566fa427a4fe50d400a011b65a3a7e"
# admin_config.FETCH_NIP88 = True
# admin_config.EVENTID = "63a791cdc7bf78c14031616963105fce5793f532bb231687665b14fb6d805fdb"
# admin_config.PRIVKEY = dvm_config.PRIVATE_KEY
return DicoverContentDBUpdateScheduler(name=name, dvm_config=dvm_config, nip89config=nip89config,
nip88config=nip88config,
admin_config=admin_config,
options=options)
if __name__ == '__main__':
process_venv(DicoverContentDBUpdateScheduler)