diff --git a/ArduinoControl/arduino_control.py b/ArduinoControl/arduino_control.py new file mode 100644 index 0000000..f73dc88 --- /dev/null +++ b/ArduinoControl/arduino_control.py @@ -0,0 +1,89 @@ +import serial + +# serial communication params +SERIAL_PORT = "/dev/ttyUSB0" +DEFAULT_BAUD_RATE = 9600 + +class ArduinoControlService: + + def __init__(self, port=SERIAL_PORT, baud_rate=DEFAULT_BAUD_RATE): + self._controller = serial.Serial(port, baud_rate) + self._state = 0 + + # public methods + def get_state(self): + """ + Returns output state. + :return: output state 0/1 + """ + return self._state + + def control(self, state): + """ + Control arduino writing through serial port. Output state is written as str. + :param state: value that determines output state - one of the following values (`switch`, `power off`, + `power on`) (str) + :return: void method + """ + print("Calling arduino control method with params: [state = {}]".format(state)) + self._set_state(state) + self._controller.write(str(self._state).encode()) + + def dispose(self): + """ + Closes the serial port. + :return: void method + """ + self._controller.close() + + # private methods + def _state_switch(self): + """ + Switches the output state. + :return: void method + """ + self._state = 1 - self._state + + def _turn_on(self): + """ + Sets output state to high. + :return: void method + """ + self._state = 1 + + def _turn_off(self): + """ + Sets output state to low. + :return: void method + """ + self._state = 0 + + def _set_state(self, state): + """ + Sets output state based on state value. + :param state: value that determines output state - one of the following values (`switch`, `power off`, + `power on`) (str) + :return: void method + """ + if state == "switch": + self._state_switch() + elif state == "power off": + self._turn_off() + elif state == "power on": + self._turn_on() + else: + raise ValueError("Invalid state.") + print("Current relay state = {}".format(self.get_state())) + + +import time + +ar_s = ArduinoControlService() +for i in range(6): + ar_s.control("switch") + print(ar_s.get_state()) + time.sleep(3) + +ar_s.control("power on") +ar_s.control("power off") +ar_s.dispose() \ No newline at end of file diff --git a/ArduinoControl/arudino_control.ino b/ArduinoControl/arudino_control.ino new file mode 100644 index 0000000..3ad201b --- /dev/null +++ b/ArduinoControl/arudino_control.ino @@ -0,0 +1,30 @@ + +#define RELAY1 7 +void setup() + +{ + + Serial.begin(9600); + pinMode(RELAY1, OUTPUT); + +} + +void loop() + +{ + + if (Serial.available()) { + char serialListener = Serial.read(); + Serial.println(serialListener); + if (serialListener == '0') { + digitalWrite(RELAY1, 0); + Serial.println("Light OFF"); + } + else if (serialListener == '1') { + digitalWrite(RELAY1, 1); + Serial.println("Light ON"); + } + } + + +} \ No newline at end of file diff --git a/Karatsuba Algorithm/Karatsuba.py b/Karatsuba Algorithm/Karatsuba.py new file mode 100644 index 0000000..55273ae --- /dev/null +++ b/Karatsuba Algorithm/Karatsuba.py @@ -0,0 +1,21 @@ +def test(func, a): + try: + assert func == a + except Exception: + print("{} != {}".format(func, a)) + + +def mul(x, y): + return int(bin(int(str(x), 2) * (int(str(y), 2)))[2:]) + + +if __name__ == "__main__": + test(mul(1100, 1010), 1111000) + test(mul(110, 1010), 111100) + test(mul(11, 1010), 11110) + test(mul(1, 1010), 1010) + test(mul(0, 1010), 0) + test(mul(111, 111), 110001) + test(mul(11, 11), 1001) + print("Ok tested") + input() diff --git a/README.md b/README.md index 4cd4a1e..b78d903 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ ## FunUtils +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils?ref=badge_shield) + Some codes I wrote to help me with errands. it really made my life easier hope it does for you too ^^ @@ -116,6 +118,11 @@ list.json 20. **Clipboard Translator**: Program that automatically translates a text copied to the clipboard. +21. **Translate Excel**: Program that uses google translator to translate an excel column and row-range in a given excel sheet. It asks from which language to which you want to translate. +``` +python3 translate_excel.py <> <> <> <> +``` + ## Authors * **Houssem Charfeddine** - *FunUtils* - [HC](https://github.com/HoussemCharf) @@ -123,7 +130,12 @@ list.json * **Shashank S** - *Backup,Get Comics,Wallpaper* - [talsperre](https://github.com/talsperre) * **Aditya Y** - *ToDo* - [Screwed-U-Head](https://github.com/Screwed-Up-Head) * **Bart E** - *Bad Link Filter* - [Callidus](https://github.com/Baev1) +* **Lukas Loukota** - *Translate Excel* - [loukotal](https://github.com/loukotal) + ## License codes are licensed under the MIT License - see the [LICENSE.md](LICENSE) file for details + + +[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils.svg?type=large)](https://app.fossa.io/projects/git%2Bgithub.com%2FHoussemCharf%2FFunUtils?ref=badge_large) \ No newline at end of file diff --git a/Searching Algorithms/binary_search.py b/Searching Algorithms/binary_search.py new file mode 100644 index 0000000..644f98f --- /dev/null +++ b/Searching Algorithms/binary_search.py @@ -0,0 +1,29 @@ +# +# Binary search works for a sorted array. +# Note: The code logic is written for an array sorted in +# increasing order. +# T(n): O(log n) +# +def binary_search(array, query): + lo, hi = 0, len(array) - 1 + while lo <= hi: + mid = (hi + lo) // 2 + val = array[mid] + if val == query: + return mid + elif val < query: + lo = mid + 1 + else: + hi = mid - 1 + return None + +def binary_search_recur(array, low, high, val): + if low > high: # error case + return -1 + mid = (low + high) // 2 + if val < array[mid]: + return binary_search_recur(array, low, mid - 1, val) + elif val > array[mid]: + return binary_search_recur(array, mid + 1, high, val) + else: + return mid diff --git a/Searching Algorithms/linear_search.py b/Searching Algorithms/linear_search.py new file mode 100644 index 0000000..cf57fcf --- /dev/null +++ b/Searching Algorithms/linear_search.py @@ -0,0 +1,12 @@ +# +# Linear search works in any array. +# +# T(n): O(n) +# + +def linear_search(array, query): + for i in range(len(array)): + if array[i] == query: + return i + + return -1 diff --git a/Send Email/send_email.py b/Send Email/send_email.py new file mode 100644 index 0000000..3c1b696 --- /dev/null +++ b/Send Email/send_email.py @@ -0,0 +1,28 @@ +import smtplib +import ssl +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +try: + email = "<< Enter your email >>" + password = "<< Enter your password" + to = "<< Enter sender email >>" + msg = """ << Email Body >>""" + message = MIMEMultipart() + message["From"] = email + message["To"] = to + message["Subject"] = "HacktoberFest 2019" + message.attach(MIMEText(msg, "plain")) + context = ssl.create_default_context() + server = smtplib.SMTP("smtp.gmail.com") + server.starttls() + server.ehlo() + server.login(email, password) + server.sendmail(email, to, message.as_string()) + print('Email have been successfully send') + +except Exception as ex: + print(ex) + +finally: + server.quit() \ No newline at end of file diff --git a/Sorting Algorithms/bucket_sort.py b/Sorting Algorithms/bucket_sort.py new file mode 100644 index 0000000..d89232c --- /dev/null +++ b/Sorting Algorithms/bucket_sort.py @@ -0,0 +1,28 @@ +def bucket_sort(arr): + ''' Bucket Sort + Complexity: O(n^2) + The complexity is dominated by nextSort + ''' + # The number of buckets and make buckets + num_buckets = len(arr) + buckets = [[] for bucket in range(num_buckets)] + # Assign values into bucket_sort + for value in arr: + index = value * num_buckets // (max(arr) + 1) + buckets[index].append(value) + # Sort + sorted_list = [] + for i in range(num_buckets): + sorted_list.extend(next_sort(buckets[i])) + return sorted_list + +def next_sort(arr): + # We will use insertion sort here. + for i in range(1, len(arr)): + j = i - 1 + key = arr[i] + while arr[j] > key and j >= 0: + arr[j+1] = arr[j] + j = j - 1 + arr[j + 1] = key + return arr diff --git a/Sorting Algorithms/insertion_sort.py b/Sorting Algorithms/insertion_sort.py new file mode 100644 index 0000000..06c8622 --- /dev/null +++ b/Sorting Algorithms/insertion_sort.py @@ -0,0 +1,25 @@ +def insertion_sort(arr, simulation=False): + """ Insertion Sort + Complexity: O(n^2) + """ + + iteration = 0 + if simulation: + print("iteration",iteration,":",*arr) + + for i in range(len(arr)): + cursor = arr[i] + pos = i + + while pos > 0 and arr[pos - 1] > cursor: + # Swap the number down the list + arr[pos] = arr[pos - 1] + pos = pos - 1 + # Break and do the final swap + arr[pos] = cursor + + if simulation: + iteration = iteration + 1 + print("iteration",iteration,":",*arr) + + return arr diff --git a/SpeechToText/stt.py b/SpeechToText/stt.py new file mode 100644 index 0000000..20753a5 --- /dev/null +++ b/SpeechToText/stt.py @@ -0,0 +1,89 @@ +import speech_recognition as sr + +#dependency - SpeechRecognition lib https://github.com/Uberi/speech_recognition - pip3 install SpeechRecognition +#convert speech to text from microphone audio + + +RECOGNITION_METHODS = { + "bing": "recognize_bing", + "google": "recognize_google", + "google_cloud": "recognize_google_cloud", + "houndify": "recognize_houndify", + "ibm": "recognize_ibm", + "sphinx": "recognize_sphinx", + "wit": "recognize_wit", + "azure": "recognize_azure" + +} + +class SpeechRecognizer: + + def __init__(self, recognition_api="google", language="en-us"): + self._recognizer = sr.Recognizer() + # below energy_threshold is considered silence, above speech + self._recognizer.energy_threshold = 500 + self._recognition_api = recognition_api + self._recognition_method = None + self._determine_recognition_method() + self._microphone = sr.Microphone() + self._language = language + + # public methods + def set_language(self, language): + """ + Sets recognition _language. + :param str language: _language code + :rtype: None + :return: void method + """ + assert (isinstance(language, str)) + self._language = language + + def get_language(self): + """ + Returns recognition _language. + :rtype:str + :return: recognition _language + """ + return self._language + + def recognize_from_microphone(self): + """ + Returns action result with recognized text from the speech. Input speech is read from microphone. Raises RequestError or + UnknownValueError. + :rtype: string + :return: recognized text from the speech + """ + audio = self._get_audio_from_microphone() + speech = self._recognition_method(audio, language=self._language) + return speech + + # private methods + def _determine_recognition_method(self): + """ + Determines and sets recognition method - based on the API name. + :rtype: None + :return: void method + """ + api_method = RECOGNITION_METHODS.get(self._recognition_api, "recognize_google") + if self._recognizer is not None and hasattr(self._recognizer, api_method): + self._recognition_method = getattr(self._recognizer, api_method) + + # NOTE: not implemented + def _recognize_from_file(self, audio_file): + pass + + def _get_audio_from_microphone(self): + """ + Returns audio data from the microphone + :rtype: AudioData + :return: + """ + audio = None + if self._microphone is not None: + with self._microphone as source: + print('Ready for command...') + self._recognizer.adjust_for_ambient_noise(source) + audio = self._recognizer.listen(source) + print("Audio data = {}".format(audio)) + return audio diff --git a/SpeechToText/stt_test.py b/SpeechToText/stt_test.py new file mode 100644 index 0000000..f394ae6 --- /dev/null +++ b/SpeechToText/stt_test.py @@ -0,0 +1,4 @@ +from stt import SpeechRecognizer + +recognizer = SpeechRecognizer("en-US") +print(recognizer.recognize_from_microphone()) diff --git a/TextToSpeech/tts.py b/TextToSpeech/tts.py new file mode 100644 index 0000000..0ec61d0 --- /dev/null +++ b/TextToSpeech/tts.py @@ -0,0 +1,63 @@ +# possible replacement pyttsx3 +from gtts import gTTS +import os +from utils import * +# tts audio config +PATH_TO_AUDIO_DIR = r"audio/" +DEFAULT_AUDIO_FILE = PATH_TO_AUDIO_DIR + "temporary.mp3" + +#NOTE: +# install dependencies: +# pip3 install gTTS +# pip3 install playsound +class Speaker: + def __init__(self, language="en-us"): + self._language = language + self._tts = gTTS(lang=self._language, text="dummy") + + # public methods + def set_language(self, language): + """ + Sets operating speaking _language. + :param str language: _language code + :rtype: None + :return: void method + """ + assert (isinstance(language, str)) + self._language = language + self._tts.lang = self._language + + def get_language(self): + """ + Returns speaking _language. + :rtype:str + :return: speaking _language + """ + return self._language + + def save_speech_and_play(self, text=""): + """ + Speak out the given text. Text must not be empty string. + :param str text: text to be spoken + :rtype: None + :return: void method + """ + assert (isinstance(text, str)) + if text != '': + self._speak(text, str(get_current_timestamp()) + ".mp3") + + # private methods + def _speak(self, text, file_name=DEFAULT_AUDIO_FILE): + """ + Speak out and play audio. + :param str text: + :param str file_name: audio file in which speech will be saved + :rtype: None + :return:void method + """ + assert (isinstance(text, str)) + if file_name != DEFAULT_AUDIO_FILE: + file_name = PATH_TO_AUDIO_DIR + file_name + self._tts.text = text + self._tts.save(file_name) + play_audio(file_name) diff --git a/TextToSpeech/tts_test.py b/TextToSpeech/tts_test.py new file mode 100644 index 0000000..0ef62c3 --- /dev/null +++ b/TextToSpeech/tts_test.py @@ -0,0 +1,5 @@ +from tts import Speaker + +speaker = Speaker("en-us") +print(speaker._speak("Marry had a little lamb!")) +print(speaker.save_speech_and_play(text="Hello, world!")) diff --git a/TextToSpeech/utils.py b/TextToSpeech/utils.py new file mode 100644 index 0000000..de57c2b --- /dev/null +++ b/TextToSpeech/utils.py @@ -0,0 +1,21 @@ +import time +from playsound import playsound + +def play_audio(file_name): + """ + Play audio file. + :param str file_name: name of file that will be played. + :rtype: None + :return: void method + """ + assert (isinstance(file_name, str)) + playsound(file_name) + + +def get_current_timestamp(): + """ + Returns current timestamp as str. + :rtype: str + :return: current timestamp (Return the current time in seconds since the Epoch) + """ + return time.time() diff --git a/Translate excel/requirements.txt b/Translate excel/requirements.txt new file mode 100644 index 0000000..79210c0 --- /dev/null +++ b/Translate excel/requirements.txt @@ -0,0 +1,10 @@ +certifi==2024.7.4 +chardet==3.0.4 +et-xmlfile==1.0.1 +googletrans==2.4.0 +idna==3.7 +jdcal==1.4.1 +openpyxl==3.0.0 +requests==2.32.4 +urllib3==2.5.0 + diff --git a/Translate excel/translate_excel.py b/Translate excel/translate_excel.py new file mode 100644 index 0000000..cf79112 --- /dev/null +++ b/Translate excel/translate_excel.py @@ -0,0 +1,60 @@ +import googletrans +from openpyxl import load_workbook +import sys + + +def main(): + if len(sys.argv) != 5: + print("incorrect input parameters") + return 1 + + # Load worksheet + worksheet = sys.argv[1] + + wb = load_workbook(worksheet) + + # Selects active worksheet (excel List) (index 0 by default == 1st List) + ws = wb.active + + + column = sys.argv[2] + + start_str = column + sys.argv[3] + end_str = column + sys.argv[4] + + # Selects 6 cells + rows = ws[start_str:end_str] + + # Initialize Translator Instance from Translator Object (I guess?) + translator = googletrans.Translator() + + dest = input("Translate to (ie. cs, en, ..): ") + src = input("Translate from: ") + # Iterating through objects in rows + for row in rows: + # Uses googletrans package to ping Google Translate API (AJAX call) to translate input text + # Input text is 1st parameter in .translate method on Translator object + + # Iterating through cells in row (It's an object) + for cell in row: + + # Checks if cell is empty (could be checked more elegantly by removing empty cells I think) + # If empty (doesn't have a value) skip it + if not cell.value: + continue + else: + text = translator.translate(cell.value, dest=dest, src=src).text + cell.value = text + + # Splits on "." to get rid of the xlsx ending + cut_postfix = worksheet.split(".") + + save_str = cut_postfix[0] + "_translated.xlsx" + # Saves the changes to a new file + + wb.save(save_str) + print("Successfully translated!") + + +if __name__ == "__main__": + main() diff --git a/WhasappBot/whatsappbot.py b/WhasappBot/whatsappbot.py new file mode 100644 index 0000000..bda6f89 --- /dev/null +++ b/WhasappBot/whatsappbot.py @@ -0,0 +1,125 @@ +# Imports the required dependency libraries +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.common.action_chains import ActionChains +from requests import get +from bs4 import BeautifulSoup as bs +import keyboard +import time +import click +import os +import sys +import csv +import threading + +# Setting the browser configurations to launch the browser +chrome_options = Options() +chrome_options.add_argument( + "user-data-dir=" + os.path.dirname(sys.argv[0])) + +# Launching the Browser +driver = webdriver.Chrome(chrome_options=chrome_options) + +# Maximizing the browser window +driver.maximize_window() + +# Opening the given url in Browser +driver.get("https://web.whatsapp.com") +# Time to load the QR Code and scenning via Mobile Whatsapp +time.sleep(25) + +# Key in the value of the Chat name that you want to read the messages and reply +target = '"your_friend/group_name"' + +# Identify the Chatlist based on its element +panel = driver.find_element_by_class_name('chatlist-panel-body') + + +# If the chatlist is huge, we have to scroll and find the elements +elem = None +a = 0 +while elem is None: + a += 300 + try: + driver.execute_script('arguments[0].scrollTop = %s' % a, panel) + elem = driver.find_element_by_xpath( + '//span[@title=' + target + ']') + except: + pass + +# Once the element is found, we are moving to the specific element +ac = ActionChains(driver) +ac.move_to_element(elem).click().perform() +time.sleep(2) + +url = driver.page_source + +# Reading message and taking decisions based on the condition +def readMessage(): + + # A new tread will start at the specified time interval and will read the last available message + threading.Timer(5.0, readMessage).start() + url = driver.page_source + soup = bs(url, "lxml") + + try: + gotdiv = soup.find_all("div", { "class" : "msg msg-group" })[-1] + except IndexError: + gotdiv = 'null' + + if gotdiv == 'null': + div = soup.find_all("div", { "class" : "bubble bubble-text copyable-text" })[-1] + # print(div) + else: + div = soup.find_all("div", { "class" : "msg msg-group" })[-1] + + text = div.find_all('span') + print(text) + + try: + gottext = text[4].find_all(text=True)[1] + except IndexError: + gottext = 'null' + + if gottext == 'null': + div = soup.find_all("div", { "class" : "chat-title" })[-1] + name = div.find_all(text=True)[1] + try: + msg = text[-2].find_all(text=True)[1].lower() + except IndexError: + msg = "You replied last" + time = text[-1].find(text=True) + + else: #group + name = text[3].find_all(text=True)[1] + try: + msg = text[4].find_all(text=True)[1].lower() + except IndexError: + msg = "You replied last" + try: + time = text[-2].find(text=True) + except: + time = "None" + + + print(name, msg, time) + +# Getting appropriate reply from the csv +# Bot will lookup the csv for reply Only if the text contains the word buddy + +if "buddy" in msg: + + with open('dict.csv', "r") as f: + reader = csv.reader(f) + chat = {} + + for row in reader: + key = row[0] + chat[key] = row[1:] + try: + gotreply = chat[msg] + except KeyError: + gotreply = 'null' + + print(gotreply) diff --git a/heap/__init__.py b/heap/__init__.py new file mode 100644 index 0000000..9ea682d --- /dev/null +++ b/heap/__init__.py @@ -0,0 +1,5 @@ +from .binary_heap import * +from .skyline import * +from .sliding_window_max import * +from .merge_sorted_k_lists import * +from .k_closest_points import * diff --git a/heap/binary_heap.py b/heap/binary_heap.py new file mode 100644 index 0000000..629be43 --- /dev/null +++ b/heap/binary_heap.py @@ -0,0 +1,111 @@ +""" +Binary Heap. A min heap is a complete binary tree where each node is smaller +its childen. The root, therefore, is the minimum element in the tree. The min +heap use array to represent the data and operation. For example a min heap: + + 4 + / \ + 50 7 + / \ / +55 90 87 + +Heap [0, 4, 50, 7, 55, 90, 87] + +Method in class: insert, remove_min +For example insert(2) in a min heap: + + 4 4 2 + / \ / \ / \ + 50 7 --> 50 2 --> 50 4 + / \ / \ / \ / \ / \ / \ +55 90 87 2 55 90 87 7 55 90 87 7 + +For example remove_min() in a min heap: + + 4 87 7 + / \ / \ / \ + 50 7 --> 50 7 --> 50 87 + / \ / / \ / \ +55 90 87 55 90 55 90 + +""" +from abc import ABCMeta, abstractmethod + +class AbstractHeap(metaclass=ABCMeta): + """Abstract Class for Binary Heap.""" + def __init__(self): + pass + @abstractmethod + def perc_up(self, i): + pass + @abstractmethod + def insert(self, val): + pass + @abstractmethod + def perc_down(self,i): + pass + @abstractmethod + def min_child(self,i): + pass + @abstractmethod + def remove_min(self,i): + pass +class BinaryHeap(AbstractHeap): + def __init__(self): + self.currentSize = 0 + self.heap = [(0)] + + def perc_up(self, i): + while i // 2 > 0: + if self.heap[i] < self.heap[i // 2]: + # Swap value of child with value of its parent + self.heap[i], self.heap[i//2] = self.heap[i//2], self.heap[i] + i = i // 2 + + """ + Method insert always start by inserting the element at the bottom. + it inserts rightmost spot so as to maintain the complete tree property + Then, it fix the tree by swapping the new element with its parent, + until it finds an appropriate spot for the element. It essentially + perc_up the minimum element + Complexity: O(logN) + """ + def insert(self, val): + self.heap.append(val) + self.currentSize = self.currentSize + 1 + self.perc_up(self.currentSize) + + """ + Method min_child returns index of smaller 2 childs of its parent + """ + def min_child(self, i): + if 2 * i + 1 > self.currentSize: # No right child + return 2 * i + else: + # left child > right child + if self.heap[2 * i] > self.heap[2 * i +1]: + return 2 * i + 1 + else: + return 2 * i + + def perc_down(self, i): + while 2 * i < self.currentSize: + min_child = self.min_child(i) + if self.heap[min_child] < self.heap[i]: + # Swap min child with parent + self.heap[min_child], self.heap[i] = self.heap[i], self.heap[min_child] + i = min_child + """ + Remove Min method removes the minimum element and swap it with the last + element in the heap( the bottommost, rightmost element). Then, it + perc_down this element, swapping it with one of its children until the + min heap property is restored + Complexity: O(logN) + """ + def remove_min(self): + ret = self.heap[1] # the smallest value at beginning + self.heap[1] = self.heap[self.currentSize] # Repalce it by the last value + self.currentSize = self.currentSize - 1 + self.heap.pop() + self.perc_down(1) + return ret diff --git a/heap/k_closest_points.py b/heap/k_closest_points.py new file mode 100644 index 0000000..8e38e05 --- /dev/null +++ b/heap/k_closest_points.py @@ -0,0 +1,44 @@ +"""Given a list of points, find the k closest to the origin. + +Idea: Maintain a max heap of k elements. +We can iterate through all points. +If a point p has a smaller distance to the origin than the top element of a heap, we add point p to the heap and remove the top element. +After iterating through all points, our heap contains the k closest points to the origin. +""" + + +from heapq import heapify, heappushpop + + +def k_closest(points, k, origin=(0, 0)): + # Time: O(k+(n-k)logk) + # Space: O(k) + """Initialize max heap with first k points. + Python does not support a max heap; thus we can use the default min heap where the keys (distance) are negated. + """ + heap = [(-distance(p, origin), p) for p in points[:k]] + heapify(heap) + + """ + For every point p in points[k:], + check if p is smaller than the root of the max heap; + if it is, add p to heap and remove root. Reheapify. + """ + for p in points[k:]: + d = distance(p, origin) + + heappushpop(heap, (-d, p)) # heappushpop does conditional check + """Same as: + if d < -heap[0][0]: + heappush(heap, (-d,p)) + heappop(heap) + + Note: heappushpop is more efficient than separate push and pop calls. + Each heappushpop call takes O(logk) time. + """ + + return [p for nd, p in heap] # return points in heap + + +def distance(point, origin=(0, 0)): + return (point[0] - origin[0])**2 + (point[1] - origin[1])**2 diff --git a/heap/merge_sorted_k_lists.py b/heap/merge_sorted_k_lists.py new file mode 100644 index 0000000..2fbfe1d --- /dev/null +++ b/heap/merge_sorted_k_lists.py @@ -0,0 +1,68 @@ +""" +Merge k sorted linked lists and return it as one sorted list. Analyze and describe its complexity. +""" + + +from heapq import heappop, heapreplace, heapify +from queue import PriorityQueue + + +# Definition for singly-linked list. +class ListNode(object): + def __init__(self, x): + self.val = x + self.next = None + + +def merge_k_lists(lists): + dummy = node = ListNode(0) + h = [(n.val, n) for n in lists if n] + heapify(h) + while h: + v, n = h[0] + if n.next is None: + heappop(h) # only change heap size when necessary + else: + heapreplace(h, (n.next.val, n.next)) + node.next = n + node = node.next + + return dummy.next + + +def merge_k_lists(lists): + dummy = ListNode(None) + curr = dummy + q = PriorityQueue() + for node in lists: + if node: + q.put((node.val, node)) + while not q.empty(): + curr.next = q.get()[1] # These two lines seem to + curr = curr.next # be equivalent to :- curr = q.get()[1] + if curr.next: + q.put((curr.next.val, curr.next)) + return dummy.next + + +""" +I think my code's complexity is also O(nlogk) and not using heap or priority queue, +n means the total elements and k means the size of list. + +The mergeTwoLists function in my code comes from the problem Merge Two Sorted Lists +whose complexity obviously is O(n), n is the sum of length of l1 and l2. + +To put it simpler, assume the k is 2^x, So the progress of combination is like a full binary tree, +from bottom to top. So on every level of tree, the combination complexity is n, +because every level have all n numbers without repetition. +The level of tree is x, ie log k. So the complexity is O(n log k). + +for example, 8 ListNode, and the length of every ListNode is x1, x2, +x3, x4, x5, x6, x7, x8, total is n. + +on level 3: x1+x2, x3+x4, x5+x6, x7+x8 sum: n + +on level 2: x1+x2+x3+x4, x5+x6+x7+x8 sum: n + +on level 1: x1+x2+x3+x4+x5+x6+x7+x8 sum: n +""" diff --git a/heap/skyline.py b/heap/skyline.py new file mode 100644 index 0000000..c666703 --- /dev/null +++ b/heap/skyline.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" +A city's skyline is the outer contour of the silhouette formed by all the buildings +in that city when viewed from a distance. +Now suppose you are given the locations and height of all the buildings +as shown on a cityscape photo (Figure A), +write a program to output the skyline formed by these buildings collectively (Figure B). + +The geometric information of each building is represented by a triplet of integers [Li, Ri, Hi], +where Li and Ri are the x coordinates of the left and right edge of the ith building, respectively, +and Hi is its height. It is guaranteed that 0 ≤ Li, Ri ≤ INT_MAX, 0 < Hi ≤ INT_MAX, and Ri - Li > 0. +You may assume all buildings are perfect rectangles grounded on an absolutely flat surface at height 0. + +For instance, the dimensions of all buildings in Figure A are recorded as: +[ [2 9 10], [3 7 15], [5 12 12], [15 20 10], [19 24 8] ] . + +The output is a list of "key points" (red dots in Figure B) in the format of +[ [x1,y1], [x2, y2], [x3, y3], ... ] +that uniquely defines a skyline. +A key point is the left endpoint of a horizontal line segment. Note that the last key point, +where the rightmost building ends, +is merely used to mark the termination of the skyline, and always has zero height. +Also, the ground in between any two adjacent buildings should be considered part of the skyline contour. + +For instance, the skyline in Figure B should be represented as:[ [2 10], [3 15], [7 12], [12 0], [15 10], [20 8], [24, 0] ]. + +Notes: + +The number of buildings in any input list is guaranteed to be in the range [0, 10000]. +The input list is already sorted in ascending order by the left x position Li. +The output list must be sorted by the x position. +There must be no consecutive horizontal lines of equal height in the output skyline. For instance, +[...[2 3], [4 5], [7 5], [11 5], [12 7]...] is not acceptable; the three lines of height 5 should be merged +into one in the final output as such: [...[2 3], [4 5], [12 7], ...] + +""" +import heapq + +def get_skyline(lrh): + """ + Wortst Time Complexity: O(NlogN) + :type buildings: List[List[int]] + :rtype: List[List[int]] + """ + skyline, live = [], [] + i, n = 0, len(lrh) + while i < n or live: + if not live or i < n and lrh[i][0] <= -live[0][1]: + x = lrh[i][0] + while i < n and lrh[i][0] == x: + heapq.heappush(live, (-lrh[i][2], -lrh[i][1])) + i += 1 + else: + x = -live[0][1] + while live and -live[0][1] <= x: + heapq.heappop(live) + height = len(live) and -live[0][0] + if not skyline or height != skyline[-1][1]: + skyline += [x, height], + return skyline diff --git a/heap/sliding_window_max.py b/heap/sliding_window_max.py new file mode 100644 index 0000000..f88458a --- /dev/null +++ b/heap/sliding_window_max.py @@ -0,0 +1,41 @@ +""" +Given an array nums, there is a sliding window of size k +which is moving from the very left of the array to the very right. +You can only see the k numbers in the window. +Each time the sliding window moves right by one position. + +For example, +Given nums = [1,3,-1,-3,5,3,6,7], and k = 3. + +Window position Max +--------------- ----- +[1 3 -1] -3 5 3 6 7 3 + 1 [3 -1 -3] 5 3 6 7 3 + 1 3 [-1 -3 5] 3 6 7 5 + 1 3 -1 [-3 5 3] 6 7 5 + 1 3 -1 -3 [5 3 6] 7 6 + 1 3 -1 -3 5 [3 6 7] 7 +Therefore, return the max sliding window as [3,3,5,5,6,7]. +""" +import collections + + +def max_sliding_window(nums, k): + """ + :type nums: List[int] + :type k: int + :rtype: List[int] + """ + if not nums: + return nums + queue = collections.deque() + res = [] + for num in nums: + if len(queue) < k: + queue.append(num) + else: + res.append(max(queue)) + queue.popleft() + queue.append(num) + res.append(max(queue)) + return res diff --git a/machine learning/k_means_clustering.py b/machine learning/k_means_clustering.py new file mode 100644 index 0000000..4c21eea --- /dev/null +++ b/machine learning/k_means_clustering.py @@ -0,0 +1,103 @@ +from sklearn.datasets import load_digits +from sklearn.preprocessing import scale +from sklearn.decomposition import PCA +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt +from sklearn import metrics +import numpy as np + + +# ---------------------------------------------------------------------------------------------------------- # +# A script using K-Means Clustering to classify handwritten digits. # +# Written by @tobinatore # +# # +# This script uses the following dataset: # +# Sklearn's own written digits dataset # +# ---------------------------------------------------------------------------------------------------------- # + + +def bench_k_means(estimator, name, data): + estimator.fit(data) + # A short explanation for every score: + # homogeneity: each cluster contains only members of a single class (range 0 - 1) + # completeness: all members of a given class are assigned to the same cluster (range 0 - 1) + # v_measure: harmonic mean of homogeneity and completeness + # adjusted_rand: similarity of the actual values and their predictions, + # ignoring permutations and with chance normalization + # (range -1 to 1, -1 being bad, 1 being perfect and 0 being random) + # adjusted_mutual_info: agreement of the actual values and predictions, ignoring permutations + # (range 0 - 1, with 0 being random agreement and 1 being perfect agreement) + # silhouette: uses the mean distance between a sample and all other points in the same class, + # as well as the mean distance between a sample and all other points in the nearest cluster + # to calculate a score (range: -1 to 1, with the former being incorrect, + # and the latter standing for highly dense clustering. + # 0 indicates overlapping clusters. + print('%-9s \t%i \thomogeneity: %.3f \tcompleteness: %.3f \tv-measure: %.3f \tadjusted-rand: %.3f \t' + 'adjusted-mutual-info: %.3f \tsilhouette: %.3f' + % (name, estimator.inertia_, + metrics.homogeneity_score(y, estimator.labels_), + metrics.completeness_score(y, estimator.labels_), + metrics.v_measure_score(y, estimator.labels_), + metrics.adjusted_rand_score(y, estimator.labels_), + metrics.adjusted_mutual_info_score(y, estimator.labels_), + metrics.silhouette_score(data, estimator.labels_, + metric='euclidean'))) + + +def plot(kmeans, data): + reduced_data = PCA(n_components=2).fit_transform(data) + kmeans.fit(reduced_data) + + # Step size of the mesh. Decrease to increase the quality of the VQ. + h = .01 # point in the mesh [x_min, x_max]x[y_min, y_max]. + + # Plot the decision boundary. For that, we will assign a color to each + x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 + y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 + xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) + + # Obtain labels for each point in mesh. Use last trained model. + Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) + + # Put the result into a color plot + Z = Z.reshape(xx.shape) + plt.figure(1) + plt.clf() + plt.imshow(Z, interpolation='nearest', + extent=(xx.min(), xx.max(), yy.min(), yy.max()), + cmap=plt.cm.Paired, + aspect='auto', origin='lower') + + plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2) + + # Plot the centroids as a white X + centroids = kmeans.cluster_centers_ + plt.scatter(centroids[:, 0], centroids[:, 1], + marker='x', s=169, linewidths=3, + color='w', zorder=10) + plt.title('K-means clustering on the digits dataset (PCA-reduced data)\n' + 'Centroids are marked with white cross') + plt.xlim(x_min, x_max) + plt.ylim(y_min, y_max) + plt.xticks(()) + plt.yticks(()) + plt.show() + + +# Loading and preparing the data +digits = load_digits() +data = scale(digits.data) +y = digits.target + +# Number of clusters +k = len(np.unique(y)) + +samples, features = data.shape + +# Defining the classifier +classifier = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=300) + +# Computing the score of this classifier +bench_k_means(classifier, "kmeans++", data) + +plot(classifier, data) diff --git a/machine learning/linear_regression.py b/machine learning/linear_regression.py new file mode 100644 index 0000000..0b3a165 --- /dev/null +++ b/machine learning/linear_regression.py @@ -0,0 +1,151 @@ +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np +import statistics + + +# -------------------------------------------------------------------------------------------------------- # +# A script using linear regression to estimate the grades of students in G3 based on their results in G1 # +# and G2 as well as their absences during the academic year, their failures and the time studied per week. # +# Written by @tobinatore # +# # +# This script uses the following dataset: # +# https://archive.ics.uci.edu/ml/datasets/Student+Performance # +# -------------------------------------------------------------------------------------------------------- # + + +def read_data(filename): + """ + Function for reading the CSV-file and dropping all columns that aren't important for our purpose. + :param filename: String + :return: DataFrame + """ + dat = pd.read_csv(filename, sep=";") + dat = dat[["G1", "G2", "studytime", "failures", "absences", "G3"]] + return dat + + +def r_squared(pred, res): + """ + Calculating the R² score of this model. + Value returned is between 0.0 and 1.0, the higher the better. + :param pred: List + :param res: List + :return: Float + """ + ss_t = 0 + ss_r = 0 + + for i in range(len(pred)): + ss_t += (res[i] - statistics.mean(res)) ** 2 + ss_r += (res[i] - pred[i]) ** 2 + + return 1 - (ss_r / ss_t) + + +def rmse(pred, res): + """ + Calculating the Root Mean Square Error. + The lower the returned value, the better. + :param pred: List + :param res: List + :return: Float + """ + rmse = 0 + for i in range(len(pred)): + rmse += (res[i] - pred[i]) ** 2 + return np.sqrt(rmse / len(pred)) + + +def get_cost(X, y, theta): + """ + Getting the cost using the current values of theta. + :param X: numpy.ndarray + :param y: numpy.ndarray + :param theta: numpy.ndarray + :return: Float + """ + cost = np.power(((X @ theta.T)-y), 2) + return np.sum(cost)/(2 * len(X)) + + +def gradient_descent(X, y, theta, iterations, alpha): + """ + Optimizing the values of theta using gradient descent. + :param X: numpy.ndarray + :param y: numpy.ndarray + :param theta: numpy.ndarray + :param iterations: Integer + :param alpha: Integer + :return: numpy.ndarray, numpy.ndarray + """ + cost = np.zeros(iterations) + for i in range(iterations): + theta = theta - (alpha / len(X)) * np.sum(X * ((X @ theta.T) - y), axis=0) + cost[i] = get_cost(X, y, theta) + return theta, cost + + +data = read_data("student-mat.csv") + +# Splitting the data in two batches. +# 70% training data, 30% test data +train = data.sample(frac=0.7) +test = data.drop(train.index) + +# Preparing 2 numpy arrays. +# X will hold all data except G3 and y only holds G3 +X = train.iloc[:, :5] +ones = np.ones([X.shape[0], 1]) +X = np.concatenate((ones, X), axis=1) + +y = train.iloc[:, -1:].values + +# Initializing theta +theta = np.zeros([1, 6]) + +# Setting hyper parameters +alpha = 0.00001 +iterations = 5000 + +# Training the model. +# This means optimizing the cost via gradient descent and calculating the final cost. +theta, cost = gradient_descent(X, y, theta, iterations, alpha) +final_cost = get_cost(X, y, theta) + +# Plotting the cost in relation to the iteration +fig, ax = plt.subplots() +ax.plot(np.arange(iterations), cost, 'r') +ax.set_xlabel('Iterations') +ax.set_ylabel('Cost') +ax.set_title('Error vs. Training Epoch') +plt.show() + +print("Final cost: ", final_cost) + +# Initializing the test set +X_test = test.iloc[:, :5].values.tolist() + +y_test = test.iloc[:, -1:].values + +theta = theta.tolist() + +# Transforming y_test from [[10],[4],...,[20]] to a simple list [10, 4, ..., 20] +store = [] +for entry in y_test.tolist(): + store.append(entry[0]) + +y_test = store.copy() + +# Calculating predictions using the function theta1 + (theta2 * x1) + ... + (theta6 * x5) +predictions = [] +for line in X_test: + prediction = round(theta[0][0] + (theta[0][1]*line[0]) + (theta[0][2]*line[1]) + (theta[0][3]*line[2]) + \ + (theta[0][4] * line[3]) + (theta[0][5]*line[4])) + + predictions.append(prediction) + +# Printing the score of the model +print("RMSE-Score: ", rmse(predictions, y_test)) +print("R²-Score:", r_squared(predictions, y_test)) + diff --git a/machine learning/nearest_neighbor.py b/machine learning/nearest_neighbor.py new file mode 100644 index 0000000..d0fabab --- /dev/null +++ b/machine learning/nearest_neighbor.py @@ -0,0 +1,41 @@ +import math + +def distance(x,y): + """[summary] + HELPER-FUNCTION + calculates the (eulidean) distance between vector x and y. + + Arguments: + x {[tuple]} -- [vector] + y {[tuple]} -- [vector] + """ + assert len(x) == len(y), "The vector must have same length" + result = () + sum = 0 + for i in range(len(x)): + result += (x[i] -y[i],) + for component in result: + sum += component**2 + return math.sqrt(sum) + + +def nearest_neighbor(x, tSet): + """[summary] + Implements the nearest neighbor algorithm + + Arguments: + x {[tupel]} -- [vector] + tSet {[dict]} -- [training set] + + Returns: + [type] -- [result of the AND-function] + """ + assert isinstance(x, tuple) and isinstance(tSet, dict) + current_key = () + min_d = float('inf') + for key in tSet: + d = distance(x, key) + if d < min_d: + min_d = d + current_key = key + return tSet[current_key] pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy