Docker Hub can be crawled now email and notification pending

This commit is contained in:
asamihassan 2022-02-06 17:21:55 +05:00
parent 2f7220cad1
commit 503dc8dd7e
9 changed files with 80 additions and 28 deletions

Binary file not shown.

View file

@ -0,0 +1,25 @@
# Generated by Django 3.0 on 2022-02-06 09:05
import datetime
from django.db import migrations, models
from django.utils.timezone import utc
class Migration(migrations.Migration):
dependencies = [
('dockerhubCrawler', '0008_auto_20220205_2121'),
]
operations = [
migrations.AlterField(
model_name='dockerhubcrawler',
name='last_pushed',
field=models.DateTimeField(blank=True, default=datetime.datetime(2019, 5, 13, 9, 5, 38, 555224, tzinfo=utc), null=True),
),
migrations.AlterField(
model_name='dockerhubcrawler',
name='last_updated',
field=models.DateTimeField(blank=True, default=datetime.datetime(2019, 5, 13, 9, 5, 38, 555256, tzinfo=utc), null=True),
),
]

View file

@ -1,3 +1,4 @@
from operator import le
from django.db import models
from django.utils import timezone
from datetime import timedelta
@ -8,18 +9,25 @@ class DockerhubCrawler(models.Model):
url = models.CharField(max_length=300)
api_url = models.CharField(max_length=1000, null= True, blank=True)
last_pushed = models.DateTimeField(null= True, blank=True)
last_updated = models.DateTimeField(null= True, blank=True)
last_pushed = models.DateTimeField(null= True, blank=True, default=timezone.now() + timedelta(days=-1000))
last_updated = models.DateTimeField(null= True, blank=True, default=timezone.now() + timedelta(days=-1000))
def __str__(self):
return self.url
def save(self, *args, **kwargs):
split_string = self.url.split("/")
print(split_string)
self.api_url = 'https://hub.docker.com/v2/repositories/' + split_string[4] +'/' \
+ split_string[5] + '/tags/?page=1&page_size=10'
#print(split_string)
self.api_url = 'https://hub.docker.com/v2/repositories/' + split_string[len(split_string)-2] +'/' \
+ split_string[len(split_string)-1] + '/tags/?page=1&page_size=2'
super(DockerhubCrawler, self).save(*args, **kwargs)
# Research , via Chrome dev tools ;)
#https://hub.docker.com/r/vectorim/element-web/tags
#https://hub.docker.com/v2/repositories/vectorim/element-web/tags/?page=1&page_size=800
#https://hub.docker.com/v2/repositories/library/nextcloud/
#https://hub.docker.com/_/nextcloud?tab=tags

View file

@ -1,7 +1,9 @@
from django.shortcuts import render
import requests
from .models import DockerhubCrawler
from django.db import models
# from django.db import models (Tried doesn't work:Sami)
import datetime
from datetime import timedelta
# Create your views here.
@ -10,34 +12,51 @@ def HomePage(request):
def DockerPage(request):
datatimeobjlast_updatetemp = models.DateTimeField()
datetimeobjlast_updated = models.DateTimeField()
#datatimeobjlast_pushedtemp = models.DateTimeField()
#datetimeobjtag_last_pushed = models.DateTimeField()
datatimeobjlast_pushedtemp = datetime.datetime.now()
datetimeobjtag_last_pushed = datetime.datetime.now()
count = 0
for dhubobj in DockerhubCrawler.objects.all():
resp = requests.get(url=dhubobj.api_url)
data = resp.json()
string_obj = ''
for chunk in data['results']: # looping through the data if updated in greater than prev date time save it
#print(chunk)
if count == 0: # first iteration lets just assign both object values
splits = ((chunk['tag_last_pushed'].split(":")))
#print(splits)
datatimeobjlast_pushedtemp = datetime.datetime(int(splits[0][:4]), int(splits[0][5:7])
,int(splits[0][8:10]), int(splits[0][11:]), int(splits[1][0:3]),
int(splits[2][0:2]), int(splits[2][3:len(splits[2])-1]))
datetimeobjtag_last_pushed = datetime.datetime(int(splits[0][:4]), int(splits[0][5:7])
,int(splits[0][8:10]), int(splits[0][11:]), int(splits[1][0:3]),
int(splits[2][0:2]), int(splits[2][3:len(splits[2])-1]))
string_obj = chunk['tag_last_pushed']
else:
splits = ((chunk['tag_last_pushed'].split(":")))
#print(splits)
#print(splits[0][:4], splits[0][5:7], splits[0][8:10],splits[0][11:] ,splits[1][0:3] , splits[2][0:2], splits[2][3:len(splits[2])-1])
datatimeobjlast_pushedtemp = splits = ((chunk['tag_last_pushed'].split(":")))
datatimeobjlast_pushedtemp = datetime.datetime(int(splits[0][:4]), int(splits[0][5:7])
,int(splits[0][8:10]), int(splits[0][11:]), int(splits[1][0:3]),
int(splits[2][0:2]), int(splits[2][3:len(splits[2])-1]))
if datatimeobjlast_pushedtemp > datetimeobjtag_last_pushed:
datetimeobjtag_last_pushed = datatimeobjlast_pushedtemp
string_obj = chunk['tag_last_pushed']
for chunk in data['results']: # looping through the data if updated in greater than prev date time save it
count = count + 1
if count == 0:
datatimeobjlast_updatetemp = models.DateTimeField(chunk['last_updated'])
datetimeobjlast_updated = models.DateTimeField(chunk['last_updated'])
else:
datatimeobjlast_updatetemp = models.DateTimeField(chunk['last_updated'])
if datatimeobjlast_updatetemp > datetimeobjlast_updated:
datetimeobjlast_updated = datatimeobjlast_updatetemp
count = count + 1
# end for here, string_obj should have latest pushed date by now
if dhubobj.last_updated == None:
# send eemail guys
dhubobj.last_updated = (datetimeobjlast_updated)
dhubobj.save()
if dhubobj.last_updated < datetimeobjlast_updated:
#send email here for now
dhubobj.last_updated = datetimeobjlast_updated
dhubobj.save()
splits = (string_obj.split(":"))
dhubobj.last_pushed = datetimeobjtag_last_pushed
# here we will notify and send the email
dhubobj.save()
allobjs = DockerhubCrawler.objects.all()

View file

@ -91,7 +91,7 @@
{% for obj in dockerhubobjs %}
<li>{{ obj.url }}</li>
<li>{{ obj.last_pushed }}</li>
<li>{{ obj.last_updated }}</li>
{% comment %} <li>{{ obj.last_updated }}</li> {% endcomment %}
<br>
{% endfor %}
</ul>

View file

@ -44,7 +44,7 @@
<header class="header_section">
<div class="container">
<nav class="navbar navbar-expand-lg custom_nav-container ">
<a class="navbar-brand" href="index.html">
<a class="navbar-brand" href="/">
<span>
CrawlerApp
</span>